diff --git a/demos/demos_databases_apis/microsoft/sentinel/sentinel_graph_examples.ipynb b/demos/demos_databases_apis/microsoft/sentinel/sentinel_graph_examples.ipynb new file mode 100644 index 0000000000..b9a1f89403 --- /dev/null +++ b/demos/demos_databases_apis/microsoft/sentinel/sentinel_graph_examples.ipynb @@ -0,0 +1,333 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Microsoft Sentinel Graph API with Graphistry\n", + "\n", + "This notebook demonstrates how to query Microsoft Sentinel Graph API and visualize threat intelligence data with Graphistry.\n", + "\n", + "## Requirements\n", + "\n", + "```bash\n", + "pip install graphistry[sentinel-graph]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install dependencies (uncomment if needed)\n", + "# !pip install graphistry[sentinel-graph]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "Import libraries and configure Graphistry." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import graphistry\n", + "from azure.identity import InteractiveBrowserCredential\n", + "\n", + "# Register with Graphistry\n", + "# IMPORTANT: Store credentials securely using environment variables\n", + "graphistry.register(\n", + " api=3,\n", + " protocol=\"https\",\n", + " server=\"hub.graphistry.com\"\n", + " # personal_key_id='YOUR_KEY_ID',\n", + " # personal_key_secret='YOUR_KEY_SECRET'\n", + ")\n", + "\n", + "print(\"\u2713 Graphistry configured\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": "## Discover Available Graph Instances\n\nUse `sentinel_graph_list()` to see what graph instances are available in your tenant. You only need a placeholder `graph_instance` for this call \u2014 the value is not used by the list endpoint." + }, + { + "cell_type": "code", + "source": "g = graphistry.configure_sentinel_graph(\n graph_instance=graph_instance_name,\n credential=credential,\n response_formats=[\"Graph\"] # default; use [\"Table\", \"Graph\"] to also get raw tabular data\n)\n\nprint(f\"\u2713 Sentinel Graph configured for instance: {graph_instance_name}\")", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Interactive browser authentication\n", + "credential = InteractiveBrowserCredential()\n", + "\n", + "# Replace 'YourGraphInstance' with your actual graph instance name\n", + "g = graphistry.configure_sentinel_graph(\n", + " graph_instance='YourGraphInstance',\n", + " credential=credential\n", + ")\n", + "\n", + "print(\"\u2713 Sentinel Graph configured\")" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": "query = \"\"\"\nMATCH (n)-[e]->(m)\nRETURN *\nLIMIT 50\n\"\"\"\n\nviz = g.sentinel_graph(query)\nprint(f\"Query returned {len(viz._nodes)} nodes and {len(viz._edges)} edges\")\n\nviz.plot()", + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Basic query to get nodes and edges\n", + "query = \"\"\"\n", + "MATCH (n)-[e]->(m)\n", + "RETURN *\n", + "LIMIT 50\n", + "\"\"\"\n", + "\n", + "viz = g.sentinel_graph(query)\n", + "print(f\"Query returned {len(viz._node)} nodes and {len(viz._edge)} edges\")\n", + "\n", + "viz.plot()" + ] + }, + { + "cell_type": "code", + "metadata": {}, + "source": "print(\"=\" * 80)\nprint(\"NODES\")\nprint(\"=\" * 80)\nprint(f\"Shape: {viz._nodes.shape}\")\nprint(f\"Columns: {list(viz._nodes.columns)}\")\nprint(\"\\nSample nodes:\")\ndisplay(viz._nodes.head(3))\n\nprint(\"\\n\" + \"=\" * 80)\nprint(\"EDGES\")\nprint(\"=\" * 80)\nprint(f\"Shape: {viz._edges.shape}\")\nprint(f\"Columns: {list(viz._edges.columns)}\")\nprint(\"\\nSample edges:\")\ndisplay(viz._edges.head(3))", + "outputs": [], + "execution_count": null + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Access node and edge DataFrames\n", + "print(\"=\" * 80)\n", + "print(\"NODES\")\n", + "print(\"=\" * 80)\n", + "print(f\"Shape: {viz._node.shape}\")\n", + "print(f\"Columns: {list(viz._node.columns)}\")\n", + "print(\"\\nSample nodes:\")\n", + "display(viz._node.head(3))\n", + "\n", + "print(\"\\n\" + \"=\" * 80)\n", + "print(\"EDGES\")\n", + "print(\"=\" * 80)\n", + "print(f\"Shape: {viz._edge.shape}\")\n", + "print(f\"Columns: {list(viz._edge.columns)}\")\n", + "print(\"\\nSample edges:\")\n", + "display(viz._edge.head(3))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 3: Enhanced Visualization\n", + "\n", + "Add visual encodings for better graph exploration." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "styled = (\n", + " viz\n", + " .encode_edge_color('edge', as_categorical=True)\n", + " .encode_point_color('label', as_categorical=True)\n", + " .encode_point_size('label', default_mapping=100)\n", + ")\n", + "\n", + "styled.plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 4: Query with Filters\n", + "\n", + "Use WHERE clause to filter results." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Query with WHERE clause (adjust property name as needed for your graph)\n", + "filtered_query = \"\"\"\n", + "MATCH (a)-[e]->(b)\n", + "WHERE a.id IS NOT NULL\n", + "RETURN *\n", + "LIMIT 30\n", + "\"\"\"\n", + "\n", + "filtered_viz = g.sentinel_graph(filtered_query)\n", + "print(f\"Found {len(filtered_viz._edge)} edges\")\n", + "\n", + "filtered_viz.plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 5: Query Nodes Only\n", + "\n", + "Retrieve specific nodes from the graph." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Query nodes only\n", + "nodes_query = \"\"\"\n", + "MATCH (n)\n", + "RETURN n\n", + "LIMIT 20\n", + "\"\"\"\n", + "\n", + "nodes_viz = g.sentinel_graph(nodes_query)\n", + "nodes_viz.plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example 6: Error Handling\n", + "\n", + "Demonstrate robust error handling." + ] + }, + { + "cell_type": "code", + "source": "# Request both Table and Graph formats in a single call\n# Graphistry automatically parses the Graph section for visualization\nboth_formats_viz = g.sentinel_graph(\n \"MATCH (n)-[e]->(m) RETURN * LIMIT 20\",\n response_formats=[\"Table\", \"Graph\"]\n)\n\nprint(f\"Nodes: {len(both_formats_viz._nodes)}, Edges: {len(both_formats_viz._edges)}\")\nboth_formats_viz.plot()", + "metadata": {}, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": "## Requesting Both Graph and Table Formats\n\nPass `response_formats=[\"Table\", \"Graph\"]` to get both structured graph data and the raw tabular rows in a single API call. Graphistry will parse the `Graph` section; the `Table` section is available for additional inspection if needed.", + "metadata": {} + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " # Invalid query syntax\n", + " bad_query = \"INVALID SYNTAX\"\n", + " result = g.sentinel_graph(bad_query)\n", + "except Exception as e:\n", + " print(f\"Query failed as expected: {type(e).__name__}\")\n", + " print(f\"Error message: {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Alternative Authentication: Service Principal\n", + "\n", + "For production environments, use service principal authentication with credentials stored securely." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Uncomment and configure for production use\n", + "#\n", + "# import os\n", + "# \n", + "# g_prod = graphistry.configure_sentinel_graph(\n", + "# graph_instance='YourGraphInstance', # Replace with your graph instance name\n", + "# tenant_id=os.environ.get('AZURE_TENANT_ID'),\n", + "# client_id=os.environ.get('AZURE_CLIENT_ID'),\n", + "# client_secret=os.environ.get('AZURE_CLIENT_SECRET')\n", + "# )\n", + "# \n", + "# result = g_prod.sentinel_graph('MATCH (n) RETURN n LIMIT 10')\n", + "# result.plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Cleanup\n", + "\n", + "Clear cached authentication token." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "g.sentinel_graph_close()\n", + "print(\"\u2713 Sentinel Graph connection closed\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/graphistry/__init__.py b/graphistry/__init__.py index 30c537155c..ffa06e5015 100644 --- a/graphistry/__init__.py +++ b/graphistry/__init__.py @@ -46,6 +46,10 @@ kusto_from_client, kql, kusto_graph, + configure_sentinel_graph, + sentinel_graph_from_credential, + sentinel_graph, + sentinel_graph_close, gsql, gsql_endpoint, cosmos, diff --git a/graphistry/client_session.py b/graphistry/client_session.py index 715eaf789c..c9c900a00a 100644 --- a/graphistry/client_session.py +++ b/graphistry/client_session.py @@ -10,6 +10,7 @@ from . import util from .plugins_types.spanner_types import SpannerConfig from .plugins_types.kusto_types import KustoConfig +from .plugins_types.sentinel_graph_types import SentinelGraphConfig @@ -86,6 +87,7 @@ def __init__(self) -> None: # NOTE: These are dataclasses, so we shallow copy them self.kusto: Optional[KustoConfig] = None self.spanner: Optional[SpannerConfig] = None + self.sentinel_graph: Optional[SentinelGraphConfig] = None # TODO: Migrate to a pattern like Kusto or Spanner self._bolt_driver: Optional[Any] = None diff --git a/graphistry/plotter.py b/graphistry/plotter.py index bdf71f3117..b3e6e99464 100644 --- a/graphistry/plotter.py +++ b/graphistry/plotter.py @@ -14,12 +14,13 @@ from .compute.cluster import ClusterMixin from .plugins.kusto import KustoMixin from .plugins.spanner import SpannerMixin +from .plugins.sentinel_graph import SentinelGraphMixin from .client_session import AuthManagerProtocol # NOTE: Cooperative mixins must call: # super().__init__(*a, **kw) in their __init__ method # to pass along args/kwargs to the next mixin in the chain class Plotter( - KustoMixin, SpannerMixin, + KustoMixin, SpannerMixin, SentinelGraphMixin, CosmosMixin, NeptuneMixin, HeterographEmbedModuleMixin, SearchToGraphMixin, @@ -53,6 +54,7 @@ class Plotter( - :py:class:`graphistry.gremlin.NeptuneMixin`: Integrates with AWS Neptune DB. - :py:class:`graphistry.plugins.kusto.KustoMixin`: Integrates with Azure Kusto DB. - :py:class:`graphistry.plugins.spanner.SpannerMixin`: Integrates with Google Spanner DB. + - :py:class:`graphistry.plugins.sentinel_graph.SentinelGraphMixin`: Integrates with Microsoft Sentinel Graph API. Attributes: All attributes are inherited from the mixins and base classes. diff --git a/graphistry/plugins/sentinel_graph.py b/graphistry/plugins/sentinel_graph.py new file mode 100644 index 0000000000..3a252b5f6b --- /dev/null +++ b/graphistry/plugins/sentinel_graph.py @@ -0,0 +1,608 @@ +import json +import time +import requests +import pandas as pd +from typing import List, Optional, Union, TYPE_CHECKING +from functools import wraps + +if TYPE_CHECKING: + from azure.core.credentials import TokenCredential +else: + TokenCredential = object + +from graphistry.Plottable import Plottable +from graphistry.util import setup_logger +from graphistry.plugins_types.sentinel_graph_types import ( + SentinelGraphConfig, + SentinelGraphConnectionError, + SentinelGraphQueryError +) + +logger = setup_logger(__name__) + + +def retry_on_request_exception(func): + """Decorator for HTTP retry with exponential backoff""" + @wraps(func) + def wrapper(self, *args, **kwargs): + cfg = self._sentinel_graph_config + + for attempt in range(cfg.max_retries): + try: + return func(self, *args, **kwargs) + except requests.exceptions.RequestException as e: + if attempt < cfg.max_retries - 1: + wait_time = cfg.retry_backoff_factor ** attempt + # Security: Log exception type but not details (might contain URLs with sensitive data) + logger.warning( + f"Request failed (attempt {attempt + 1}/{cfg.max_retries}): " + f"{type(e).__name__}. Retrying in {wait_time}s..." + ) + time.sleep(wait_time) + + # Security: Provide generic error message + raise SentinelGraphConnectionError( + f"Request failed after {cfg.max_retries} retries. " + f"Check network connectivity and endpoint configuration." + ) + + return wrapper + + +class SentinelGraphMixin(Plottable): + """ + Microsoft Sentinel Graph API integration for graph queries. + + This mixin allows you to query Microsoft Security Platform Graph API + using GQL (Graph Query Language) and visualize the results with Graphistry. + + Security Notes: + - Authentication tokens are cached in memory with repr=False to prevent accidental exposure + - HTTPS is enforced for all API endpoints + - SSL certificate verification is enabled by default + - Credentials (client_secret, tokens) are never logged + - Error messages are sanitized to prevent information disclosure + - Query content is not logged to prevent exposure of sensitive data + """ + + def configure_sentinel_graph( + self, + graph_instance: str, + credential: Optional["TokenCredential"] = None, + tenant_id: Optional[str] = None, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + use_device_auth: bool = False, + api_endpoint: str = "api.securityplatform.microsoft.com", + auth_scope: str = "73c2949e-da2d-457a-9607-fcc665198967/.default", + timeout: int = 60, + max_retries: int = 3, + retry_backoff_factor: float = 2.0, + verify_ssl: bool = True, + response_formats: Optional[List[str]] = None + ) -> Plottable: + """Configure Microsoft Sentinel Graph API connection. + + Sets up the connection parameters for accessing a Sentinel Graph instance. + Authentication can be done via: + - Custom credential object (highest priority) + - Service principal (client_id + client_secret + tenant_id) + - Device code auth (use_device_auth=True) + - Interactive browser credential (fallback) + + :param graph_instance: Graph instance name (e.g., "YourGraphInstance") + :type graph_instance: str + :param credential: Custom credential object for authentication + :type credential: Optional[TokenCredential] + :param tenant_id: Azure AD tenant ID for service principal auth + :type tenant_id: Optional[str] + :param client_id: Azure AD application (client) ID + :type client_id: Optional[str] + :param client_secret: Azure AD application secret + :type client_secret: Optional[str] + :param use_device_auth: Use device code authentication flow + :type use_device_auth: bool + :param api_endpoint: API endpoint hostname + :type api_endpoint: str + :param auth_scope: OAuth scope for authentication + :type auth_scope: str + :param timeout: Request timeout in seconds + :type timeout: int + :param max_retries: Maximum number of retry attempts + :type max_retries: int + :param retry_backoff_factor: Exponential backoff factor for retries + :type retry_backoff_factor: float + :param verify_ssl: Verify SSL certificates (default: True, recommended for security) + :type verify_ssl: bool + :param response_formats: Response formats to request from API (default: ["Graph"]) + :type response_formats: Optional[List[str]] + :returns: Self for method chaining + :rtype: Plottable + + **Example: Interactive browser authentication** + :: + + import graphistry + g = graphistry.configure_sentinel_graph( + graph_instance="YourGraphInstance" + ) + + **Example: Service principal authentication** + :: + + import graphistry + g = graphistry.configure_sentinel_graph( + graph_instance="YourGraphInstance", + tenant_id="your-tenant-id", + client_id="your-client-id", + client_secret="your-client-secret" + ) + + **Example: Custom scope for different environment** + :: + + import graphistry + g = graphistry.configure_sentinel_graph( + graph_instance="CustomGraphInstance", + auth_scope="custom-scope/.default", + api_endpoint="custom.endpoint.com" + ) + """ + # Security: Validate endpoint doesn't use HTTP + if api_endpoint.startswith('http://'): + raise ValueError( + "HTTP endpoints are not allowed for security reasons. " + "Please use HTTPS or provide hostname only." + ) + + # Strip https:// prefix if provided (we'll add it in the request) + api_endpoint_clean = api_endpoint.replace('https://', '') + + self.session.sentinel_graph = SentinelGraphConfig( + graph_instance=graph_instance, + api_endpoint=api_endpoint_clean, + auth_scope=auth_scope, + timeout=timeout, + max_retries=max_retries, + retry_backoff_factor=retry_backoff_factor, + verify_ssl=verify_ssl, + credential=credential, + tenant_id=tenant_id, + client_id=client_id, + client_secret=client_secret, + use_device_auth=use_device_auth, + response_formats=response_formats if response_formats is not None else ["Graph"] + ) + return self + + def sentinel_graph_from_credential( + self, + credential: "TokenCredential", + graph_instance: str, + **kwargs + ) -> Plottable: + """Configure Sentinel Graph using an existing credential. + + Use this method when you already have a configured credential + and want to reuse it with Graphistry. + + :param credential: Pre-configured TokenCredential + :type credential: TokenCredential + :param graph_instance: Graph instance name + :type graph_instance: str + :param kwargs: Additional configuration options (see configure_sentinel_graph) + :returns: Self for method chaining + :rtype: Plottable + + **Example** + :: + + from azure.identity import DefaultAzureCredential + import graphistry + + credential = DefaultAzureCredential() + g = graphistry.sentinel_graph_from_credential( + credential, + "YourGraphInstance" + ) + """ + return self.configure_sentinel_graph( + graph_instance=graph_instance, + credential=credential, + **kwargs + ) + + @property + def _sentinel_graph_config(self) -> SentinelGraphConfig: + """Get the current Sentinel Graph configuration.""" + if self.session.sentinel_graph is None: + raise ValueError( + "SentinelGraphMixin is not configured. Call configure_sentinel_graph() first." + ) + return self.session.sentinel_graph + + def sentinel_graph_close(self) -> None: + """Clear cached authentication token. + + **Example** + :: + + import graphistry + + graphistry.configure_sentinel_graph(...) + # ... perform queries ... + graphistry.sentinel_graph_close() + """ + if self.session.sentinel_graph is not None: + self.session.sentinel_graph._token = None + self.session.sentinel_graph._token_expiry = None + + def sentinel_graph( + self, + query: str, + language: str = 'GQL', + response_formats: Optional[List[str]] = None + ) -> Plottable: + """Execute graph query and return Plottable with nodes/edges bound. + + This is the main method - handles auth, query execution, and parsing automatically. + + :param query: GQL query string + :type query: str + :param language: Query language (default: 'GQL') + :type language: str + :returns: Plottable with nodes and edges bound + :rtype: Plottable + + **Example: Query graph data** + :: + + import graphistry + + graphistry.configure_sentinel_graph('YourGraphInstance') + + viz = graphistry.sentinel_graph(''' + MATCH (n)-[e]->(m) + RETURN * + LIMIT 100 + ''') + + viz.plot() + + **Example: Multiple queries** + :: + + import graphistry + + graphistry.configure_sentinel_graph('YourGraphInstance') + + # Query 1 + result1 = graphistry.sentinel_graph('MATCH (n) RETURN * LIMIT 10') + + # Query 2 + result2 = graphistry.sentinel_graph('MATCH (a)-[r]->(b) RETURN * LIMIT 20') + """ + cfg = self._sentinel_graph_config + effective_formats = response_formats if response_formats is not None else cfg.response_formats + response_bytes = self._sentinel_graph_query(query, language, effective_formats) + return self._parse_graph_response(response_bytes) + + @retry_on_request_exception + def _sentinel_graph_query(self, query: str, language: str, response_formats: List[str]) -> bytes: + """Internal: Execute query and return raw response bytes""" + cfg = self._sentinel_graph_config + token = self._get_auth_token() + + url = f"https://{cfg.api_endpoint}/graphs/graph-instances/{cfg.graph_instance}/query" + + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + "User-Agent": "pygraphistry-sentinel-graph" + } + + payload = { + "query": query, + "queryLanguage": language, + "responseFormats": response_formats + } + + # Security: Don't log query content (could contain sensitive data) + logger.debug(f"Executing {language} query against graph instance: {cfg.graph_instance}") + + # Security: Explicit SSL verification + response = requests.post( + url, + headers=headers, + json=payload, + timeout=cfg.timeout, + verify=cfg.verify_ssl + ) + + if response.status_code == 200: + logger.info(f"Query successful: {len(response.content)} bytes returned") + return response.content + else: + # Security: Don't expose raw API error messages which could contain sensitive info + # Instead provide generic error with status code only + raise SentinelGraphQueryError( + f"Query failed with status {response.status_code}. " + f"Check your query syntax and permissions." + ) + + def _get_auth_token(self) -> str: + """Internal: Get or refresh authentication token with 5-minute expiry buffer.""" + cfg = self._sentinel_graph_config + + # Check cached token (5 min buffer) + if cfg._token and cfg._token_expiry: + time_remaining = cfg._token_expiry - time.time() + if time_remaining > 300: # 5 min buffer + logger.debug(f"Using cached token (expires in {int(time_remaining)}s)") + return cfg._token + + # Get new token + from azure.identity import ( + ClientSecretCredential, + DeviceCodeCredential, + InteractiveBrowserCredential, + DefaultAzureCredential + ) + + try: + # Determine credential type + if cfg.credential: + logger.debug("Using provided credential") + credential = cfg.credential + elif cfg.client_id and cfg.client_secret and cfg.tenant_id: + logger.debug("Using service principal authentication") + credential = ClientSecretCredential( + tenant_id=cfg.tenant_id, + client_id=cfg.client_id, + client_secret=cfg.client_secret + ) + elif cfg.use_device_auth: + logger.info("Using device code authentication") + credential = DeviceCodeCredential() + else: + logger.debug("Using interactive browser authentication") + try: + credential = InteractiveBrowserCredential() + except Exception: + # Security: Don't log exception details which might contain sensitive info + logger.warning( + "Interactive browser auth failed. " + "Falling back to DefaultAzureCredential" + ) + credential = DefaultAzureCredential() + + # Get token + token_obj = credential.get_token(cfg.auth_scope) + token = token_obj.token + cfg._token = token + cfg._token_expiry = token_obj.expires_on + + logger.info("Successfully obtained authentication token") + return token + + except Exception: + # Security: Don't expose credential details or exception messages + raise SentinelGraphConnectionError( + "Authentication failed. Please verify your credentials, tenant ID, " + "and that you have the correct permissions for the auth scope." + ) + + def _parse_graph_response(self, response: Union[bytes, dict]) -> Plottable: + """Internal: Parse response and return Plottable""" + if isinstance(response, bytes): + try: + parsed = json.loads(response.decode('utf-8')) + except (json.JSONDecodeError, UnicodeDecodeError) as e: + raise SentinelGraphQueryError(f"Failed to parse response as JSON: {e}") + else: + parsed = response + + if "result" not in parsed: + raise SentinelGraphQueryError( + "Unexpected response format: missing 'result' key. " + "Ensure the API endpoint supports the public preview format." + ) + + logger.debug(f"Response correlationId: {parsed.get('correlationId')}") + + nodes_df = self._extract_nodes(parsed) + edges_df = self._extract_edges(parsed) + + logger.info(f"Extracted {len(nodes_df)} nodes and {len(edges_df)} edges") + + if nodes_df.empty and edges_df.empty: + logger.warning("No graph data found in response") + + return ( + self.nodes(nodes_df, node='id') + .edges(edges_df, source='source', destination='target') + ) + + def _extract_nodes(self, data: dict) -> pd.DataFrame: + """Internal: Extract and deduplicate nodes from response""" + nodes_list = [] + result = data.get("result", {}) + + # Primary path: result.graph.nodes + try: + graph_nodes = result.get("graph", {}).get("nodes", []) + for node in graph_nodes: + if isinstance(node, dict) and node.get("id"): + labels = node.get("labels", []) + node_data = {"id": node["id"]} + node_data["label"] = labels[0] if labels else None + node_data["labels"] = labels + node_data.update(node.get("properties", {})) + nodes_list.append(node_data) + except Exception as e: + logger.warning(f"Failed to extract from result.graph.nodes: {e}") + + # Secondary path: result.rawData.tables (table format) + if not nodes_list: + try: + tables = result.get("rawData", {}).get("tables", []) + for table in tables: + for row in table.get("rows", []): + for cell in row: + if not isinstance(cell, dict): + continue + if "sourceOid" in cell or "targetOid" in cell: + continue # This is an edge cell + oid = cell.get("oid") + if not oid: + continue + labels = cell.get("labels", []) + node_data = {"id": oid} + node_data["label"] = labels[0] if labels else None + node_data["labels"] = labels + node_data.update(cell.get("properties", {})) + nodes_list.append(node_data) + except Exception as e: + logger.warning(f"Failed to extract from result.rawData.tables: {e}") + + if not nodes_list: + logger.debug("No nodes found in response") + return pd.DataFrame(columns=["id", "label"]) + + nodes_df = pd.DataFrame(nodes_list) + + if "id" in nodes_df.columns and not nodes_df["id"].isna().all(): + nodes_df["_info_count"] = nodes_df.notna().sum(axis=1) + nodes_df = nodes_df.sort_values("_info_count", ascending=False) + nodes_df = nodes_df.drop_duplicates(subset="id", keep="first") + nodes_df = nodes_df.drop("_info_count", axis=1) + + return nodes_df.reset_index(drop=True) + + def _extract_edges(self, data: dict) -> pd.DataFrame: + """Internal: Extract edges from response""" + edges_list = [] + result = data.get("result", {}) + + # Primary path: result.graph.edges + try: + graph_edges = result.get("graph", {}).get("edges", []) + for edge in graph_edges: + if not isinstance(edge, dict): + continue + source = edge.get("sourceId") + target = edge.get("targetId") + if not (source and target): + continue + labels = edge.get("labels", []) + edge_data = { + "source": source, + "target": target, + "id": edge.get("id"), + "edge": labels[0] if labels else None, + "labels": labels, + } + edge_data.update(edge.get("properties", {})) + edges_list.append(edge_data) + except Exception as e: + logger.warning(f"Failed to extract from result.graph.edges: {e}") + + # Secondary path: result.rawData.tables (table format) + if not edges_list: + try: + tables = result.get("rawData", {}).get("tables", []) + for table in tables: + for row in table.get("rows", []): + for cell in row: + if not isinstance(cell, dict): + continue + source = cell.get("sourceOid") + target = cell.get("targetOid") + if not (source and target): + continue + labels = cell.get("labels", []) + edge_data = { + "source": source, + "target": target, + "id": cell.get("oid"), + "edge": labels[0] if labels else None, + "labels": labels, + } + edge_data.update(cell.get("properties", {})) + edges_list.append(edge_data) + except Exception as e: + logger.warning(f"Failed to extract from result.rawData.tables: {e}") + + if not edges_list: + logger.debug("No edges found in response") + return pd.DataFrame(columns=["source", "target"]) + + return pd.DataFrame(edges_list).reset_index(drop=True) + + @retry_on_request_exception + def _sentinel_graph_list_request(self) -> bytes: + """Internal: Fetch list of graph instances, return raw response bytes""" + cfg = self._sentinel_graph_config + token = self._get_auth_token() + url = f"https://{cfg.api_endpoint}/graphs/graph-instances" + headers = { + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + "User-Agent": "pygraphistry-sentinel-graph" + } + logger.debug("Fetching list of graph instances") + response = requests.get( + url, + headers=headers, + params={"graphTypes": "Custom"}, + timeout=cfg.timeout, + verify=cfg.verify_ssl + ) + if response.status_code == 200: + logger.info(f"Graph list fetched: {len(response.content)} bytes") + return response.content + else: + raise SentinelGraphQueryError( + f"Graph list request failed with status {response.status_code}. " + f"Check permissions for the graph instances endpoint." + ) + + def sentinel_graph_list(self) -> pd.DataFrame: + """List available graph instances from the Sentinel Graph API. + + Returns a DataFrame of available graph instances with their metadata. + Requires configure_sentinel_graph() to be called first for authentication + — the graph_instance value is not used by this method, so any placeholder + string is acceptable. + + :returns: DataFrame with columns including 'name', 'graphDefinitionName', 'instanceStatus' + :rtype: pd.DataFrame + + **Example** + :: + + import graphistry + + graphistry.configure_sentinel_graph(graph_instance="placeholder") + instances = graphistry.sentinel_graph_list() + print(instances[['name', 'instanceStatus']]) + + # Use a discovered instance for queries + graphistry.configure_sentinel_graph( + graph_instance=instances.iloc[0]['name'] + ) + viz = graphistry.sentinel_graph("MATCH (n)-[e]->(m) RETURN * LIMIT 50") + viz.plot() + """ + response_bytes = self._sentinel_graph_list_request() + try: + parsed = json.loads(response_bytes.decode("utf-8")) + except (json.JSONDecodeError, UnicodeDecodeError) as e: + raise SentinelGraphQueryError(f"Failed to parse graph list response as JSON: {e}") + + items = parsed.get("value", []) + if not items: + logger.info("No graph instances found") + return pd.DataFrame(columns=["name", "graphDefinitionName", "instanceStatus"]) + + return pd.DataFrame(items) diff --git a/graphistry/plugins_types/sentinel_graph_types.py b/graphistry/plugins_types/sentinel_graph_types.py new file mode 100644 index 0000000000..f06f34a00f --- /dev/null +++ b/graphistry/plugins_types/sentinel_graph_types.py @@ -0,0 +1,47 @@ +from typing import Any, List, Optional, TYPE_CHECKING +from dataclasses import dataclass, field + +if TYPE_CHECKING: + from azure.core.credentials import TokenCredential +else: + TokenCredential = Any + + +class SentinelGraphConnectionError(Exception): + """Raised when connection to Sentinel Graph API fails""" + pass + + +class SentinelGraphQueryError(Exception): + """Raised when a Sentinel Graph query fails""" + pass + + +@dataclass +class SentinelGraphConfig: + """Configuration for Microsoft Sentinel Graph API connection""" + graph_instance: str + + # Endpoint configuration + api_endpoint: str = "api.securityplatform.microsoft.com" + auth_scope: str = "73c2949e-da2d-457a-9607-fcc665198967/.default" + + # HTTP configuration + timeout: int = 60 + max_retries: int = 3 + retry_backoff_factor: float = 2.0 + verify_ssl: bool = True + + # Authentication options + credential: Optional[TokenCredential] = None + tenant_id: Optional[str] = None + client_id: Optional[str] = None + client_secret: Optional[str] = None + use_device_auth: bool = False + + # Query configuration + response_formats: List[str] = field(default_factory=lambda: ["Graph"]) + + # Internal state (not user-configurable) + _token: Optional[str] = field(default=None, repr=False) + _token_expiry: Optional[float] = field(default=None, repr=False) diff --git a/graphistry/pygraphistry.py b/graphistry/pygraphistry.py index 11e7ddbc4a..99d398003d 100644 --- a/graphistry/pygraphistry.py +++ b/graphistry/pygraphistry.py @@ -2194,7 +2194,69 @@ def kusto_graph(self, graph_name: str, snap_name: Optional[str] = None) -> Plott return cast(Plotter, self._plotter().kusto_graph(graph_name, snap_name)) kusto_graph.__doc__ = Plotter.kusto_graph.__doc__ + # ---- Sentinel Graph API ----------------------------------------------- # + def configure_sentinel_graph( + self, + graph_instance: str, + credential: Optional[Any] = None, + tenant_id: Optional[str] = None, + client_id: Optional[str] = None, + client_secret: Optional[str] = None, + use_device_auth: bool = False, + api_endpoint: str = "api.securityplatform.microsoft.com", + auth_scope: str = "73c2949e-da2d-457a-9607-fcc665198967/.default", + timeout: int = 60, + max_retries: int = 3, + retry_backoff_factor: float = 2.0, + verify_ssl: bool = True, + response_formats: Optional[List] = None + ) -> "GraphistryClient": + self._plotter().configure_sentinel_graph( + graph_instance=graph_instance, + credential=credential, + tenant_id=tenant_id, + client_id=client_id, + client_secret=client_secret, + use_device_auth=use_device_auth, + api_endpoint=api_endpoint, + auth_scope=auth_scope, + timeout=timeout, + max_retries=max_retries, + retry_backoff_factor=retry_backoff_factor, + verify_ssl=verify_ssl, + response_formats=response_formats + ) + return self + configure_sentinel_graph.__doc__ = Plotter.configure_sentinel_graph.__doc__ + + def sentinel_graph_from_credential( + self, + credential: Any, + graph_instance: str, + **kwargs + ) -> Plotter: + return cast(Plotter, self._plotter().sentinel_graph_from_credential( + credential, graph_instance, **kwargs + )) + sentinel_graph_from_credential.__doc__ = Plotter.sentinel_graph_from_credential.__doc__ + + def sentinel_graph( + self, + query: str, + language: str = 'GQL', + response_formats: Optional[List] = None + ) -> Plotter: + return cast(Plotter, self._plotter().sentinel_graph(query, language, response_formats)) + sentinel_graph.__doc__ = Plotter.sentinel_graph.__doc__ + + def sentinel_graph_close(self) -> None: + self._plotter().sentinel_graph_close() + sentinel_graph_close.__doc__ = Plotter.sentinel_graph_close.__doc__ + + def sentinel_graph_list(self) -> "pd.DataFrame": + return self._plotter().sentinel_graph_list() + sentinel_graph_list.__doc__ = Plotter.sentinel_graph_list.__doc__ def gsql_endpoint(self, method_name, args={}, bindings=None, db=None, dry_run=False @@ -2736,6 +2798,11 @@ def _handle_api_response(self, response): kusto_from_client = PyGraphistry.kusto_from_client kql = PyGraphistry.kql kusto_graph = PyGraphistry.kusto_graph +configure_sentinel_graph = PyGraphistry.configure_sentinel_graph +sentinel_graph_from_credential = PyGraphistry.sentinel_graph_from_credential +sentinel_graph = PyGraphistry.sentinel_graph +sentinel_graph_close = PyGraphistry.sentinel_graph_close +sentinel_graph_list = PyGraphistry.sentinel_graph_list cosmos = PyGraphistry.cosmos neptune = PyGraphistry.neptune gremlin = PyGraphistry.gremlin diff --git a/graphistry/tests/fixtures/__init__.py b/graphistry/tests/fixtures/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/graphistry/tests/fixtures/sentinel_graph_responses.py b/graphistry/tests/fixtures/sentinel_graph_responses.py new file mode 100644 index 0000000000..68213f3d67 --- /dev/null +++ b/graphistry/tests/fixtures/sentinel_graph_responses.py @@ -0,0 +1,272 @@ +""" +Test fixtures for Microsoft Sentinel Graph API responses. +Matches the public preview API format: + https://learn.microsoft.com/en-us/azure/sentinel/datalake/graph-rest-api +""" + +from typing import Any, Dict, List + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _make_node(node_id: str, labels: List[str], properties: Dict[str, Any]) -> Dict[str, Any]: + return {"id": node_id, "labels": labels, "properties": properties} + + +def _make_edge( + edge_id: str, + source_id: str, + target_id: str, + labels: List[str], + properties: Dict[str, Any] +) -> Dict[str, Any]: + return { + "id": edge_id, + "sourceId": source_id, + "targetId": target_id, + "labels": labels, + "properties": properties, + } + + +def _wrap_graph_response( + nodes: List[Dict[str, Any]], + edges: List[Dict[str, Any]], + raw_tables: List[Dict[str, Any]] = None +) -> Dict[str, Any]: + return { + "status": 200, + "result": { + "graph": {"nodes": nodes, "edges": edges}, + "rawData": {"tables": raw_tables or []}, + }, + "correlationId": "test-correlation-id-0000", + } + + +# --------------------------------------------------------------------------- +# Graph-format fixtures +# --------------------------------------------------------------------------- + +def get_minimal_response() -> Dict[str, Any]: + """1 node, 0 edges""" + nodes = [ + _make_node("node-001", ["Device"], {"hostname": "laptop-01", "os": "Windows 11"}), + ] + return _wrap_graph_response(nodes, []) + + +def get_simple_graph_response() -> Dict[str, Any]: + """3 nodes (A, B, C) and 2 edges (A->B, B->C)""" + nodes = [ + _make_node("node-a", ["User"], {"name": "Alice", "department": "Engineering"}), + _make_node("node-b", ["Group"], {"name": "Admins", "memberCount": 5}), + _make_node("node-c", ["Resource"], {"name": "FileShare", "path": "/data"}), + ] + edges = [ + _make_edge("edge-ab", "node-a", "node-b", ["MemberOf"], {"since": "2024-01-01"}), + _make_edge("edge-bc", "node-b", "node-c", ["HasAccess"], {"permission": "read"}), + ] + return _wrap_graph_response(nodes, edges) + + +def get_duplicate_nodes_response() -> Dict[str, Any]: + """Same node id appears twice; the more-complete record should be kept.""" + nodes = [ + # Sparse first occurrence + _make_node("node-dup", ["User"], {"name": "Bob"}), + # Richer second occurrence (more properties) + _make_node("node-dup", ["User"], {"name": "Bob", "email": "bob@contoso.com", "department": "IT"}), + _make_node("node-other", ["User"], {"name": "Carol"}), + ] + edges = [ + _make_edge("edge-001", "node-dup", "node-other", ["Knows"], {}), + ] + return _wrap_graph_response(nodes, edges) + + +def get_malformed_response() -> Dict[str, Any]: + """One node entry is missing the required 'id' field and should be skipped.""" + nodes = [ + _make_node("node-valid", ["User"], {"name": "Dave"}), + # Missing 'id' — parser should skip this + {"labels": ["Broken"], "properties": {"name": "No ID here"}}, + ] + edges = [ + _make_edge("edge-001", "node-valid", "node-valid", ["SelfLoop"], {}), + ] + return _wrap_graph_response(nodes, edges) + + +def get_empty_response() -> Dict[str, Any]: + """Valid response envelope with no nodes or edges.""" + return _wrap_graph_response([], []) + + +def get_complex_graph_response() -> Dict[str, Any]: + """Multiple node types and edge types with rich properties.""" + nodes = [ + _make_node("user-001", ["User"], { + "name": "Aino Rebane", + "email": "aino.rebane@contoso.com", + "department": "Engineering", + "jobTitle": "Senior Engineer", + }), + _make_node("user-002", ["User"], { + "name": "Marco Silva", + "email": "marco.silva@contoso.com", + "department": "Security", + }), + _make_node("group-001", ["Group"], { + "name": "Administrators", + "description": "System administrators", + "memberCount": 25, + }), + _make_node("resource-001", ["Resource", "FileShare"], { + "name": "FinanceData", + "path": "/shares/finance", + "sensitivity": "Confidential", + }), + _make_node("device-001", ["Device"], { + "hostname": "workstation-42", + "os": "Windows 11", + "lastSeen": "2024-03-01T12:00:00Z", + }), + ] + edges = [ + _make_edge("e-001", "user-001", "group-001", ["MemberOf"], {"assignedDate": "2024-01-15"}), + _make_edge("e-002", "user-002", "group-001", ["MemberOf"], {"assignedDate": "2024-02-01"}), + _make_edge("e-003", "group-001", "resource-001", ["HasAccess"], {"permission": "full"}), + _make_edge("e-004", "user-001", "device-001", ["Uses"], {"primary": True}), + _make_edge("e-005", "user-001", "user-002", ["CollaboratesWith"], {"projectCount": 3}), + ] + return _wrap_graph_response(nodes, edges) + + +def get_edge_only_response() -> Dict[str, Any]: + """Edges referencing node IDs that are not present in the nodes list.""" + edges = [ + _make_edge("orphan-edge-001", "ghost-node-a", "ghost-node-b", ["Relates"], {}), + ] + return _wrap_graph_response([], edges) + + +def get_response_with_special_characters() -> Dict[str, Any]: + """Node and edge properties containing unicode, special chars, and emoji.""" + nodes = [ + _make_node("node-unicode", ["User"], { + "name": "Jose Garcia", + "city": "Sao Paulo", + "notes": "resume & Japanese test", + "path": "C:\\Users\\test\\file.txt", + "json_field": '{"key": "value with quotes"}', + }), + ] + edges = [ + _make_edge("edge-unicode", "node-unicode", "node-unicode", ["SelfRef"], { + "description": "Edge with Chinese and Arabic text", + }), + ] + return _wrap_graph_response(nodes, edges) + + +def get_response_with_null_properties() -> Dict[str, Any]: + """Properties dict may contain None values.""" + nodes = [ + _make_node("node-nulls", ["User"], { + "name": "Eve", + "email": None, + "department": None, + "role": "analyst", + }), + ] + edges = [ + _make_edge("edge-nulls", "node-nulls", "node-nulls", ["SelfLoop"], { + "weight": None, + "label": "test", + }), + ] + return _wrap_graph_response(nodes, edges) + + +# --------------------------------------------------------------------------- +# Table-format fixture (rawData.tables secondary path) +# --------------------------------------------------------------------------- + +def get_table_format_response() -> Dict[str, Any]: + """Response using rawData table format only (graph section is empty). + + Note: table-format edges use 'sourceOid'/'targetOid', not 'sourceId'/'targetId'. + """ + tables = [ + { + "tableName": "PrimaryResult", + "columns": [ + {"columnName": "n", "dataType": "dynamic"}, + {"columnName": "r", "dataType": "dynamic"}, + {"columnName": "m", "dataType": "dynamic"}, + ], + "rows": [ + [ + { + "oid": "table-node-001", + "labels": ["User"], + "properties": {"name": "Alice", "department": "Engineering"}, + }, + { + "oid": "table-edge-001", + "labels": ["HasRole"], + "sourceOid": "table-node-001", + "targetOid": "table-node-002", + "properties": {"assignedDate": "2024-01-15"}, + }, + { + "oid": "table-node-002", + "labels": ["Group"], + "properties": {"name": "Administrators", "memberCount": 25}, + }, + ] + ], + } + ] + return _wrap_graph_response([], [], raw_tables=tables) + + +# --------------------------------------------------------------------------- +# Graph list endpoint fixture +# --------------------------------------------------------------------------- + +def get_graph_list_response() -> Dict[str, Any]: + """Response from GET /graphs/graph-instances?graphTypes=Custom""" + return { + "value": [ + { + "name": "TestGraph", + "mapFileName": None, + "mapFileVersion": None, + "graphDefinitionName": "TestDefinition", + "graphDefinitionVersion": "1.0", + "refreshFrequency": "PT1H", + "createTime": "2024-01-01T00:00:00Z", + "lastUpdateTime": "2024-03-01T12:00:00Z", + "lastSnapshotTime": "2024-03-01T11:00:00Z", + "lastSnapshotRequestTime": "2024-03-01T10:55:00Z", + "instanceStatus": "Ready", + }, + { + "name": "StagingGraph", + "mapFileName": None, + "mapFileVersion": None, + "graphDefinitionName": "StagingDefinition", + "graphDefinitionVersion": "0.9", + "refreshFrequency": "PT6H", + "createTime": "2024-03-01T08:00:00Z", + "lastUpdateTime": "2024-03-01T08:00:00Z", + "lastSnapshotTime": None, + "lastSnapshotRequestTime": None, + "instanceStatus": "Creating", + }, + ] + } diff --git a/graphistry/tests/plugins/test_sentinel_graph.py b/graphistry/tests/plugins/test_sentinel_graph.py new file mode 100644 index 0000000000..6d2ecad937 --- /dev/null +++ b/graphistry/tests/plugins/test_sentinel_graph.py @@ -0,0 +1,706 @@ +import pytest +import json +from unittest.mock import Mock, patch +import pandas as pd +from datetime import datetime, timedelta +import requests + +import graphistry +from graphistry.plugins.sentinel_graph import SentinelGraphMixin +from graphistry.plugins_types.sentinel_graph_types import ( + SentinelGraphConfig, + SentinelGraphConnectionError, + SentinelGraphQueryError +) +from graphistry.tests.fixtures.sentinel_graph_responses import ( + get_minimal_response, + get_simple_graph_response, + get_duplicate_nodes_response, + get_malformed_response, + get_empty_response, + get_complex_graph_response, + get_edge_only_response, + get_response_with_special_characters, + get_response_with_null_properties, + get_graph_list_response, + get_table_format_response, +) + + +SAMPLE_RESPONSE_FULL = get_simple_graph_response() # 3 nodes (node-a, node-b, node-c), 2 edges +SAMPLE_RESPONSE_EMPTY = get_empty_response() +SAMPLE_RESPONSE_MALFORMED = get_malformed_response() + + +class TestSentinelGraphConfiguration: + """Test configuration and setup methods""" + + def test_configure_with_defaults(self): + """Test basic configuration with default values""" + g = graphistry.bind() + result = g.configure_sentinel_graph(graph_instance="TestInstance") + + assert g.session.sentinel_graph is not None + assert g.session.sentinel_graph.graph_instance == "TestInstance" + assert g.session.sentinel_graph.api_endpoint == "api.securityplatform.microsoft.com" + assert g.session.sentinel_graph.timeout == 60 + assert g.session.sentinel_graph.max_retries == 3 + assert g.session.sentinel_graph.response_formats == ["Graph"] + assert result is g # Check method chaining + + def test_configure_with_custom_params(self): + """Test configuration with custom parameters""" + g = graphistry.bind() + g.configure_sentinel_graph( + graph_instance="CustomInstance", + api_endpoint="custom.endpoint.com", + auth_scope="custom-scope/.default", + timeout=120, + max_retries=5, + retry_backoff_factor=3.0 + ) + + cfg = g.session.sentinel_graph + assert cfg.graph_instance == "CustomInstance" + assert cfg.api_endpoint == "custom.endpoint.com" + assert cfg.auth_scope == "custom-scope/.default" + assert cfg.timeout == 120 + assert cfg.max_retries == 5 + assert cfg.retry_backoff_factor == 3.0 + + def test_configure_with_custom_response_formats(self): + """Test configuration with custom response_formats""" + g = graphistry.bind() + g.configure_sentinel_graph( + graph_instance="TestInstance", + response_formats=["Table", "Graph"] + ) + assert g.session.sentinel_graph.response_formats == ["Table", "Graph"] + + def test_configure_with_service_principal(self): + """Test configuration with service principal credentials""" + g = graphistry.bind() + g.configure_sentinel_graph( + graph_instance="TestInstance", + tenant_id="test-tenant", + client_id="test-client", + client_secret="test-secret" + ) + + cfg = g.session.sentinel_graph + assert cfg.tenant_id == "test-tenant" + assert cfg.client_id == "test-client" + assert cfg.client_secret == "test-secret" + + def test_sentinel_graph_from_credential(self): + """Test configuration using existing credential""" + mock_credential = Mock() + g = graphistry.bind() + result = g.sentinel_graph_from_credential( + mock_credential, + "TestInstance" + ) + + assert g.session.sentinel_graph.credential is mock_credential + assert g.session.sentinel_graph.graph_instance == "TestInstance" + assert result is g + + def test_config_not_configured_error(self): + """Test error when accessing config before configuration""" + from graphistry.plotter import Plotter + from graphistry.pygraphistry import PyGraphistry + g = Plotter(pygraphistry=PyGraphistry) + g.session.sentinel_graph = None + with pytest.raises(ValueError, match="not configured"): + _ = g._sentinel_graph_config + + def test_sentinel_graph_close(self): + """Test closing and clearing token cache""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + g.session.sentinel_graph._token = "test-token" + g.session.sentinel_graph._token_expiry = 12345.0 + + g.sentinel_graph_close() + + assert g.session.sentinel_graph._token is None + assert g.session.sentinel_graph._token_expiry is None + + +class TestAuthenticationToken: + """Test authentication token retrieval and caching""" + + @patch('azure.identity.InteractiveBrowserCredential') + def test_get_auth_token_interactive(self, mock_cred_class): + """Test token retrieval with interactive browser credential""" + mock_token = Mock() + mock_token.token = "test-token-123" + mock_token.expires_on = (datetime.now() + timedelta(hours=1)).timestamp() + + mock_credential = Mock() + mock_credential.get_token.return_value = mock_token + mock_cred_class.return_value = mock_credential + + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + token = g._get_auth_token() + + assert token == "test-token-123" + assert g.session.sentinel_graph._token == "test-token-123" + mock_credential.get_token.assert_called_once_with( + "73c2949e-da2d-457a-9607-fcc665198967/.default" + ) + + @patch('azure.identity.ClientSecretCredential') + def test_get_auth_token_service_principal(self, mock_cred_class): + """Test token retrieval with service principal""" + mock_token = Mock() + mock_token.token = "sp-token-456" + mock_token.expires_on = (datetime.now() + timedelta(hours=1)).timestamp() + + mock_credential = Mock() + mock_credential.get_token.return_value = mock_token + mock_cred_class.return_value = mock_credential + + g = graphistry.bind() + g.configure_sentinel_graph( + graph_instance="TestInstance", + tenant_id="tenant", + client_id="client", + client_secret="secret" + ) + + token = g._get_auth_token() + + assert token == "sp-token-456" + mock_cred_class.assert_called_once_with( + tenant_id="tenant", + client_id="client", + client_secret="secret" + ) + + def test_token_caching(self): + """Test that valid tokens are cached and reused""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + future_time = (datetime.now() + timedelta(hours=1)).timestamp() + g.session.sentinel_graph._token = "cached-token" + g.session.sentinel_graph._token_expiry = future_time + + with patch('azure.identity.InteractiveBrowserCredential') as mock_cred: + token = g._get_auth_token() + + assert token == "cached-token" + mock_cred.assert_not_called() + + def test_token_refresh_when_expired(self): + """Test that expired tokens trigger refresh""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + past_time = (datetime.now() - timedelta(hours=1)).timestamp() + g.session.sentinel_graph._token = "expired-token" + g.session.sentinel_graph._token_expiry = past_time + + mock_token = Mock() + mock_token.token = "new-token" + mock_token.expires_on = (datetime.now() + timedelta(hours=1)).timestamp() + + with patch('azure.identity.InteractiveBrowserCredential') as mock_cred_class: + mock_credential = Mock() + mock_credential.get_token.return_value = mock_token + mock_cred_class.return_value = mock_credential + + token = g._get_auth_token() + + assert token == "new-token" + assert g.session.sentinel_graph._token == "new-token" + + +class TestQueryExecution: + """Test query execution and HTTP handling""" + + @patch('graphistry.plugins.sentinel_graph.requests.post') + @patch.object(SentinelGraphMixin, '_get_auth_token') + def test_execute_query_success(self, mock_auth, mock_post): + """Test successful query execution""" + mock_auth.return_value = "test-token" + + mock_response = Mock() + mock_response.status_code = 200 + mock_response.content = json.dumps(SAMPLE_RESPONSE_FULL).encode('utf-8') + mock_post.return_value = mock_response + + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + result = g._sentinel_graph_query("MATCH (n) RETURN n", "GQL", ["Graph"]) + + assert result == mock_response.content + mock_post.assert_called_once() + call_kwargs = mock_post.call_args[1] + assert call_kwargs['json']['query'] == "MATCH (n) RETURN n" + assert call_kwargs['json']['queryLanguage'] == "GQL" + assert call_kwargs['json']['responseFormats'] == ["Graph"] + assert call_kwargs['headers']['Authorization'] == "Bearer test-token" + assert call_kwargs['timeout'] == 60 + + @patch('graphistry.plugins.sentinel_graph.requests.post') + @patch.object(SentinelGraphMixin, '_get_auth_token') + def test_execute_query_http_error(self, mock_auth, mock_post): + """Test query execution with HTTP error""" + mock_auth.return_value = "test-token" + + mock_response = Mock() + mock_response.status_code = 400 + mock_post.return_value = mock_response + + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + with pytest.raises(SentinelGraphQueryError, match="400"): + g._sentinel_graph_query("INVALID QUERY", "GQL", ["Graph"]) + + @patch('graphistry.plugins.sentinel_graph.requests.post') + @patch('time.sleep') + @patch.object(SentinelGraphMixin, '_get_auth_token') + def test_execute_query_retry_on_timeout(self, mock_auth, mock_sleep, mock_post): + """Test retry logic on timeout""" + mock_auth.return_value = "test-token" + + mock_post.side_effect = [ + requests.exceptions.Timeout("Timeout 1"), + requests.exceptions.Timeout("Timeout 2"), + Mock(status_code=200, content=json.dumps(SAMPLE_RESPONSE_FULL).encode()) + ] + + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance", max_retries=3) + + g._sentinel_graph_query("MATCH (n) RETURN n", "GQL", ["Graph"]) + + assert mock_post.call_count == 3 + assert mock_sleep.call_count == 2 + + @patch('graphistry.plugins.sentinel_graph.requests.post') + @patch('time.sleep') + @patch.object(SentinelGraphMixin, '_get_auth_token') + def test_execute_query_max_retries_exceeded(self, mock_auth, mock_sleep, mock_post): + """Test failure after max retries""" + mock_auth.return_value = "test-token" + mock_post.side_effect = requests.exceptions.ConnectionError("Connection failed") + + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance", max_retries=3) + + with pytest.raises(SentinelGraphConnectionError, match="3 retries"): + g._sentinel_graph_query("MATCH (n) RETURN n", "GQL", ["Graph"]) + + assert mock_post.call_count == 3 + + @patch.object(SentinelGraphMixin, '_sentinel_graph_query') + @patch.object(SentinelGraphMixin, '_parse_graph_response') + def test_sentinel_graph_main_method(self, mock_parse, mock_query): + """Test main sentinel_graph method threads response_formats""" + mock_query.return_value = b'test-response' + mock_parse.return_value = Mock() + + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + result = g.sentinel_graph("MATCH (n) RETURN n") + + mock_query.assert_called_once_with("MATCH (n) RETURN n", 'GQL', ["Graph"]) + mock_parse.assert_called_once_with(b'test-response') + assert result is mock_parse.return_value + + +class TestResponseFormats: + """Test response_formats parameter threading""" + + @patch('graphistry.plugins.sentinel_graph.requests.post') + @patch.object(SentinelGraphMixin, '_get_auth_token') + def test_default_format_is_graph(self, mock_auth, mock_post): + """Default responseFormats should be ["Graph"]""" + mock_auth.return_value = "test-token" + mock_post.return_value = Mock( + status_code=200, + content=json.dumps(SAMPLE_RESPONSE_EMPTY).encode() + ) + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + g.sentinel_graph("MATCH (n) RETURN n") + payload = mock_post.call_args[1]['json'] + assert payload['responseFormats'] == ["Graph"] + + @patch('graphistry.plugins.sentinel_graph.requests.post') + @patch.object(SentinelGraphMixin, '_get_auth_token') + def test_custom_format_passed_through(self, mock_auth, mock_post): + """Custom response_formats should be sent to the API""" + mock_auth.return_value = "test-token" + mock_post.return_value = Mock( + status_code=200, + content=json.dumps(SAMPLE_RESPONSE_EMPTY).encode() + ) + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + g.sentinel_graph("MATCH (n) RETURN n", response_formats=["Table", "Graph"]) + payload = mock_post.call_args[1]['json'] + assert payload['responseFormats'] == ["Table", "Graph"] + + @patch('graphistry.plugins.sentinel_graph.requests.post') + @patch.object(SentinelGraphMixin, '_get_auth_token') + def test_format_configured_at_configure_time(self, mock_auth, mock_post): + """response_formats set during configure_sentinel_graph should be used""" + mock_auth.return_value = "test-token" + mock_post.return_value = Mock( + status_code=200, + content=json.dumps(SAMPLE_RESPONSE_EMPTY).encode() + ) + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance", response_formats=["Table"]) + g.sentinel_graph("MATCH (n) RETURN n") + payload = mock_post.call_args[1]['json'] + assert payload['responseFormats'] == ["Table"] + + +class TestResponseParsing: + """Test node and edge extraction from various response formats""" + + def test_extract_nodes_full_response(self): + """Test node extraction from complete response""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + nodes_df = g._extract_nodes(SAMPLE_RESPONSE_FULL) + + assert len(nodes_df) == 3 + assert 'id' in nodes_df.columns + assert 'label' in nodes_df.columns + assert set(nodes_df['id']) == {'node-a', 'node-b', 'node-c'} + + def test_extract_nodes_labels_mapped(self): + """Test that labels list is mapped to label column""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + nodes_df = g._extract_nodes(SAMPLE_RESPONSE_FULL) + node_a = nodes_df[nodes_df['id'] == 'node-a'].iloc[0] + assert node_a['label'] == 'User' + assert node_a['labels'] == ['User'] + + def test_extract_nodes_properties_spread(self): + """Test that node properties are spread as top-level columns""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + nodes_df = g._extract_nodes(SAMPLE_RESPONSE_FULL) + node_a = nodes_df[nodes_df['id'] == 'node-a'].iloc[0] + assert node_a['name'] == 'Alice' + assert node_a['department'] == 'Engineering' + + def test_extract_nodes_deduplication(self): + """Test node deduplication keeps most complete record""" + duplicate_response = get_duplicate_nodes_response() + + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + nodes_df = g._extract_nodes(duplicate_response) + + # 3 entries in fixture (node-dup x2, node-other x1) -> 2 unique IDs after dedup + assert len(nodes_df) == 2 + assert set(nodes_df['id'].unique()) == {'node-dup', 'node-other'} + # The richer record (with email + department) should be kept + dup_row = nodes_df[nodes_df['id'] == 'node-dup'].iloc[0] + assert dup_row.get('email') == 'bob@contoso.com' + + def test_extract_nodes_malformed_skips_missing_id(self): + """Node entry missing 'id' should be skipped""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + nodes_df = g._extract_nodes(SAMPLE_RESPONSE_MALFORMED) + + # Only 'node-valid' should be present; the entry without 'id' is skipped + assert len(nodes_df) == 1 + assert nodes_df.iloc[0]['id'] == 'node-valid' + + def test_extract_nodes_empty_response(self): + """Test extraction from empty response""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + nodes_df = g._extract_nodes(SAMPLE_RESPONSE_EMPTY) + + assert len(nodes_df) == 0 + assert 'id' in nodes_df.columns + assert 'label' in nodes_df.columns + + def test_extract_edges_full_response(self): + """Test edge extraction from complete response""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + edges_df = g._extract_edges(SAMPLE_RESPONSE_FULL) + + assert len(edges_df) == 2 + edge_ab = edges_df[edges_df['source'] == 'node-a'].iloc[0] + assert edge_ab['target'] == 'node-b' + assert edge_ab['edge'] == 'MemberOf' + + edge_bc = edges_df[edges_df['source'] == 'node-b'].iloc[0] + assert edge_bc['target'] == 'node-c' + assert edge_bc['edge'] == 'HasAccess' + + def test_extract_edges_properties_spread(self): + """Test that edge properties are spread as top-level columns""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + edges_df = g._extract_edges(SAMPLE_RESPONSE_FULL) + edge_ab = edges_df[edges_df['source'] == 'node-a'].iloc[0] + assert edge_ab['since'] == '2024-01-01' + + def test_extract_edges_only_response(self): + """Test edge extraction when no nodes are present""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + edges_df = g._extract_edges(get_edge_only_response()) + + assert len(edges_df) == 1 + assert edges_df.iloc[0]['source'] == 'ghost-node-a' + assert edges_df.iloc[0]['target'] == 'ghost-node-b' + + def test_extract_edges_empty_response(self): + """Test edge extraction from empty response""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + edges_df = g._extract_edges(SAMPLE_RESPONSE_EMPTY) + + assert len(edges_df) == 0 + assert 'source' in edges_df.columns + assert 'target' in edges_df.columns + + def test_extract_nodes_minimal(self): + """Test minimal response with 1 node""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + nodes_df = g._extract_nodes(get_minimal_response()) + + assert len(nodes_df) == 1 + assert nodes_df.iloc[0]['id'] == 'node-001' + assert nodes_df.iloc[0]['label'] == 'Device' + + def test_null_properties_preserved(self): + """None values in properties are passed through""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + nodes_df = g._extract_nodes(get_response_with_null_properties()) + + assert len(nodes_df) == 1 + node = nodes_df.iloc[0] + assert node['name'] == 'Eve' + assert node['role'] == 'analyst' + + +class TestTableFormatParsing: + """Test rawData.tables secondary path (table format responses)""" + + def test_extract_nodes_from_table_format(self): + """Nodes should be extracted from rawData.tables when graph section is empty""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + response = get_table_format_response() + nodes_df = g._extract_nodes(response) + + assert len(nodes_df) == 2 + assert set(nodes_df['id']) == {'table-node-001', 'table-node-002'} + + def test_extract_edges_from_table_format(self): + """Edges should be extracted from rawData.tables using sourceOid/targetOid""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + response = get_table_format_response() + edges_df = g._extract_edges(response) + + assert len(edges_df) == 1 + assert edges_df.iloc[0]['source'] == 'table-node-001' + assert edges_df.iloc[0]['target'] == 'table-node-002' + assert edges_df.iloc[0]['edge'] == 'HasRole' + + def test_table_format_node_labels_mapped(self): + """Table format nodes should have label mapped from labels[0]""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + response = get_table_format_response() + nodes_df = g._extract_nodes(response) + + node_001 = nodes_df[nodes_df['id'] == 'table-node-001'].iloc[0] + assert node_001['label'] == 'User' + + +class TestGraphConversion: + """Test full graph conversion workflow""" + + def test_convert_bytes_response(self): + """Test conversion from bytes response""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + response_bytes = json.dumps(SAMPLE_RESPONSE_FULL).encode('utf-8') + result = g._parse_graph_response(response_bytes) + + assert result._nodes is not None + assert result._edges is not None + assert len(result._nodes) == 3 + assert len(result._edges) == 2 + + def test_convert_dict_response(self): + """Test conversion from dict response""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + result = g._parse_graph_response(SAMPLE_RESPONSE_FULL) + + assert result._nodes is not None + assert result._edges is not None + + def test_convert_invalid_json(self): + """Test error on invalid JSON bytes""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + with pytest.raises(SentinelGraphQueryError, match="parse.*JSON"): + g._parse_graph_response(b'not valid json') + + def test_convert_missing_result_key(self): + """Old response format without 'result' key raises clear error""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + old_format = {"Graph": {"Nodes": [], "Edges": []}, "RawData": {"Rows": []}} + with pytest.raises(SentinelGraphQueryError, match="result"): + g._parse_graph_response(old_format) + + def test_convert_empty_response(self): + """Test conversion of empty response""" + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="TestInstance") + + result = g._parse_graph_response(SAMPLE_RESPONSE_EMPTY) + + assert len(result._nodes) == 0 + assert len(result._edges) == 0 + + +class TestSentinelGraphList: + """Tests for sentinel_graph_list() method""" + + @patch('graphistry.plugins.sentinel_graph.requests.get') + @patch.object(SentinelGraphMixin, '_get_auth_token') + def test_list_returns_dataframe(self, mock_auth, mock_get): + """sentinel_graph_list returns a DataFrame with graph instance metadata""" + mock_auth.return_value = "test-token" + mock_get.return_value = Mock( + status_code=200, + content=json.dumps(get_graph_list_response()).encode() + ) + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="placeholder") + result = g.sentinel_graph_list() + + assert isinstance(result, pd.DataFrame) + assert len(result) == 2 + assert "name" in result.columns + assert "instanceStatus" in result.columns + assert result.iloc[0]["name"] == "TestGraph" + assert result.iloc[0]["instanceStatus"] == "Ready" + assert result.iloc[1]["name"] == "StagingGraph" + assert result.iloc[1]["instanceStatus"] == "Creating" + + @patch('graphistry.plugins.sentinel_graph.requests.get') + @patch.object(SentinelGraphMixin, '_get_auth_token') + def test_list_uses_correct_url_and_params(self, mock_auth, mock_get): + """List endpoint uses correct URL and graphTypes=Custom query param""" + mock_auth.return_value = "test-token" + mock_get.return_value = Mock( + status_code=200, + content=json.dumps({"value": []}).encode() + ) + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="placeholder") + g.sentinel_graph_list() + + call_args = mock_get.call_args + url = call_args[0][0] + params = call_args[1]['params'] + assert url.startswith("https://api.securityplatform.microsoft.com/") + assert "/graphs/graph-instances" in url + assert params == {"graphTypes": "Custom"} + + @patch('graphistry.plugins.sentinel_graph.requests.get') + @patch.object(SentinelGraphMixin, '_get_auth_token') + def test_list_empty_returns_empty_dataframe(self, mock_auth, mock_get): + """Empty list returns DataFrame with expected columns""" + mock_auth.return_value = "test-token" + mock_get.return_value = Mock( + status_code=200, + content=json.dumps({"value": []}).encode() + ) + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="placeholder") + result = g.sentinel_graph_list() + + assert isinstance(result, pd.DataFrame) + assert len(result) == 0 + assert set(result.columns) >= {"name", "graphDefinitionName", "instanceStatus"} + + @patch('graphistry.plugins.sentinel_graph.requests.get') + @patch.object(SentinelGraphMixin, '_get_auth_token') + def test_list_http_error_raises(self, mock_auth, mock_get): + """Non-200 HTTP response raises SentinelGraphQueryError""" + mock_auth.return_value = "test-token" + mock_get.return_value = Mock(status_code=403) + + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="placeholder") + + with pytest.raises(SentinelGraphQueryError, match="403"): + g.sentinel_graph_list() + + @patch('graphistry.plugins.sentinel_graph.requests.get') + @patch.object(SentinelGraphMixin, '_get_auth_token') + def test_list_uses_bearer_token(self, mock_auth, mock_get): + """List endpoint sends correct Authorization header""" + mock_auth.return_value = "my-bearer-token" + mock_get.return_value = Mock( + status_code=200, + content=json.dumps({"value": []}).encode() + ) + g = graphistry.bind() + g.configure_sentinel_graph(graph_instance="placeholder") + g.sentinel_graph_list() + + call_kwargs = mock_get.call_args[1] + assert call_kwargs['headers']['Authorization'] == "Bearer my-bearer-token" + + +# Integration test markers +@pytest.mark.integration +@pytest.mark.skipif(True, reason="Requires live API credentials") +class TestSentinelGraphIntegration: + """Integration tests requiring live API access""" + + def test_live_query(self): + """Test actual query against live API (requires credentials)""" + pass diff --git a/mypy.ini b/mypy.ini index 4b2ec06e8d..ff6909dc20 100644 --- a/mypy.ini +++ b/mypy.ini @@ -115,6 +115,12 @@ ignore_missing_imports = True [mypy-azure.kusto.*] ignore_missing_imports = True +[mypy-azure.core.*] +ignore_missing_imports = True + +[mypy-azure.identity] +ignore_missing_imports = True + [mypy-requests.*] ignore_missing_imports = True diff --git a/setup.py b/setup.py index a6c6cc50ba..8a3eb3b4dc 100755 --- a/setup.py +++ b/setup.py @@ -57,6 +57,7 @@ def unique_flatten_dict(d): 'jupyter': ['ipython'], 'spanner': ['google-cloud-spanner'], 'kusto': ['azure-kusto-data', 'azure-identity'], + 'sentinel-graph': ['azure-identity'], 'polars': ['polars'], }