-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathinDegrees.py
More file actions
71 lines (56 loc) · 2.47 KB
/
inDegrees.py
File metadata and controls
71 lines (56 loc) · 2.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import pandas as pd
def in_degree_by_relationship_type(driver, label: str) -> pd.DataFrame:
"""
Compute, for every node with the given label, the in-degree grouped by relationship type.
Returns a pandas DataFrame with:
- first column: nodeId (Neo4j internal id)
- subsequent columns: one per relationship type, values are counts (integers).
Parameters
----------
driver : neo4j.Driver
An existing Neo4j driver (e.g., from neo4j import GraphDatabase; GraphDatabase.driver(...))
label : str
Node label to target (e.g., "City")
Notes
-----
- Only incoming relationships are counted: ()-[r]->(n:Label).
- Nodes with no incoming relationships are included with zeros across all relationship columns.
- Relationship types become column names.
"""
# Safely backtick-escape the label (handles spaces/special chars)
label_bt = f"`{label}`"
q_nodes = f"""
MATCH (n:{label_bt})
RETURN id(n) AS nodeId
"""
q_counts = f"""
MATCH ()-[r]->(n:{label_bt})
RETURN id(n) AS nodeId, type(r) AS relType, count(r) AS cnt
"""
with driver.session() as session:
# All target node ids
node_rows = session.run(q_nodes).data()
nodes_df = pd.DataFrame(node_rows)
if nodes_df.empty:
# No nodes of that label: return empty frame with nodeId only
return pd.DataFrame(columns=["nodeId"]).astype({"nodeId": "Int64"})
# In-degree counts grouped by relationship type
count_rows = session.run(q_counts).data()
counts_df = pd.DataFrame(count_rows, columns=["nodeId", "relType", "cnt"])
if counts_df.empty:
# No incoming relationships at all; return zeros-only columns (just nodeId)
df = nodes_df.copy()
return df.astype({"nodeId": "Int64"})
# Pivot so each relationship type is its own column
pivot = counts_df.pivot_table(
index="nodeId", columns="relType", values="cnt", aggfunc="sum", fill_value=0
).reset_index()
# Ensure *all* nodes are present (including those without incoming edges)
df = nodes_df.merge(pivot, on="nodeId", how="left").fillna(0)
# Make integer dtype for counts when possible
for col in df.columns:
if col != "nodeId":
df[col] = df[col].astype(int)
# Sort columns: nodeId first, then relationship types (alphabetical)
cols = ["nodeId", *sorted([c for c in df.columns if c != "nodeId"])]
return df[cols].astype({"nodeId": "Int64"})