From e96731770eefaf8c5563cb5d1a7fd4431e0750d9 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Tue, 15 Oct 2019 09:03:34 -0400 Subject: [PATCH 01/37] internal/graph: work in progress on partition algorithm Signed-off-by: Colton McCurdy --- go.mod | 3 +- go.sum | 7 +++++ internal/graph/graph.go | 69 +++++++++++++++++++++++++++++++++++------ 3 files changed, 69 insertions(+), 10 deletions(-) diff --git a/go.mod b/go.mod index ffe1d7c..b71e974 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/google/go-cmp v0.3.1 github.com/pkg/errors v0.8.1 github.com/stretchr/testify v1.3.0 - golang.org/x/tools v0.0.0-20190826060629-95c3470cfb70 + golang.org/x/tools v0.0.0-20190918214516-5a1a30219888 + golang.org/x/tools/gopls v0.1.7 // indirect honnef.co/go/tools v0.0.1-2019.2.3 ) diff --git a/go.sum b/go.sum index a6c1a87..c1d8609 100644 --- a/go.sum +++ b/go.sum @@ -26,6 +26,7 @@ golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKG golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/sync v0.0.0-20190423024810-112230192c58 h1:8gQV6CLnAEikrhgkHFbMAEhagSSnXWGV915qUMm9mrU= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -34,6 +35,12 @@ golang.org/x/tools v0.0.0-20190425150028-36563e24a262/go.mod h1:RgjU9mgBXZiqYHBn golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20190826060629-95c3470cfb70 h1:Cde4MUY6o3RgxpWu8/7xCjEW7SE22me73ix+NIXtV7s= golang.org/x/tools v0.0.0-20190826060629-95c3470cfb70/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20190918214516-5a1a30219888 h1:ER45Jz0UDQ3e6em1lwXVwuPf96lvyQogb7m+gEbsoPg= +golang.org/x/tools v0.0.0-20190918214516-5a1a30219888/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191014205221-18e3458ac98b h1:EsQHTYgcM562dq02r6y2Yt9VpvvLNIyNECx96XQeolA= +golang.org/x/tools/gopls v0.1.7 h1:YwKf8t9h69++qCtVmc2q6fVuetFXmmu9LKoPMYLZid4= +golang.org/x/tools/gopls v0.1.7/go.mod h1:PE3vTwT0ejw3a2L2fFgSJkxlEbA8Slbk+Lsy9hTmbG8= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7 h1:9zdDQZ7Thm29KFXgAX/+yaf3eVbP7djjWp/dXAppNCc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= diff --git a/internal/graph/graph.go b/internal/graph/graph.go index f5d947f..d52adee 100644 --- a/internal/graph/graph.go +++ b/internal/graph/graph.go @@ -1,9 +1,29 @@ package graph import ( + "container/list" + "github.com/pkg/errors" ) +// Weights defines the configurable edge weights +type Weights struct { + // TypeField defines the edge weight of a type that has a field of another type. + TypeField float64 + // TypeMethod defines the edge weight of a type that is a func/method receiver. + TypeMethod float64 + // VarOfType defines the edge weight between a variable and the variable's type. + VarOfType float64 + // ConstOfType defines the edge weight between a constant and the constant's type. + ConstOfType float64 + // FuncParam defines the edge weight between a function parameter and the parameter's type. + FuncParam float64 + // FuncReturn defines the edge weight between a function return value and the value's type. + FuncReturn float64 + // FuncBody defines the edge weight between a value used in a function body and the value's type. + FuncBody float64 +} + // Graph is a map of node IDs to the Node with that ID. type Graph map[string]*Node @@ -30,17 +50,48 @@ func (g Graph) ContainsNode(id string) bool { } // Partition returns a slice of nodes that should be split from a given source graph. -// -// TODO (Issue#8): actually add logic to properly partition -func (g Graph) Partition() []*Node { - res := make([]*Node, 0, len(g)) - - // TODO: for now, just return every node - for _, v := range g { - res = append(res, v) +// thoughts: +// 1. we dont necessarily know how the graph is structured; we do know the nodes +// a. this is why we have to traverse from every node as the root +// 2. we need the distance between every pair of _connected_ nodes +// a. we have all of the edge weights +// b. we dont yet know about connectedness (we'll find that out here when we traverse the graph) +// 3. calculate distance of every root -> leaf +// a. return paths and their weights +func (g Graph) Partition(epsilon float64) [][]Node { + visited := make(map[string]map[string][]float64) + + for _, root := range g { + calculateDistance := func(a, b *Node, m map[string]map[string][]float64) { + m[a.ID][b.ID] = append(m[a.ID][b.ID], .Weight) + } + + bfs(root, visited) + } +} + +func bfs(root *Node, visited map[string]map[string][]float64, fn func(*Node, *Node, map[string]map[string][]float64)) { + queue := list.New() + visited[root.ID] = make(map[string][]float64) // paths [root][edge] and their distances + + for _, edge := range root.Edges { + if _, ok := visited[edge.Dest.ID]; ok { + fn(root, edge.Dest, visited) + continue + } + + queue.PushBack(edge.Dest) } - return res + for queue.Len() > 0 { + e := queue.Front() + n, ok := e.Value.(*Node) + if !ok { + continue + } + + bfs(n, visited, fn) + } } func (g Graph) shouldPartition() bool { From f75e31bbb1397d2b21c89aa5f27ab582fd247c54 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Wed, 16 Oct 2019 08:36:47 -0400 Subject: [PATCH 02/37] [WIP] internal/graph: working on recursiveBFS and the calculation of connectedness Signed-off-by: Colton McCurdy --- internal/graph/graph.go | 29 ++++++++++++++++++++--------- internal/graph/node.go | 16 ++++++++++------ 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/internal/graph/graph.go b/internal/graph/graph.go index d52adee..ee101bd 100644 --- a/internal/graph/graph.go +++ b/internal/graph/graph.go @@ -59,24 +59,27 @@ func (g Graph) ContainsNode(id string) bool { // 3. calculate distance of every root -> leaf // a. return paths and their weights func (g Graph) Partition(epsilon float64) [][]Node { - visited := make(map[string]map[string][]float64) for _, root := range g { - calculateDistance := func(a, b *Node, m map[string]map[string][]float64) { - m[a.ID][b.ID] = append(m[a.ID][b.ID], .Weight) - } + visited := make(map[string]bool) + recursiveBFS(root, visited, appendPaths) - bfs(root, visited) + // iterate through shortest paths and calculate connectedness and then distance + // sum connectedness } + + // TODO: calculate distance + // n.Distance = calculateDistance() + // 1 over the sum of connectedness } -func bfs(root *Node, visited map[string]map[string][]float64, fn func(*Node, *Node, map[string]map[string][]float64)) { +func recursiveBFS(root *Node, visited map[string]bool, fn func(a, b *Node)) { queue := list.New() - visited[root.ID] = make(map[string][]float64) // paths [root][edge] and their distances + visited[root.ID] = true for _, edge := range root.Edges { if _, ok := visited[edge.Dest.ID]; ok { - fn(root, edge.Dest, visited) + fn(root, edge.Dest) continue } @@ -90,7 +93,15 @@ func bfs(root *Node, visited map[string]map[string][]float64, fn func(*Node, *No continue } - bfs(n, visited, fn) + recursiveBFS(n, visited, fn) + } +} + +// appendPaths appends that paths of the edge to the paths of the root. +func appendPaths(root, edge *Node) { + for _, path := range edge.Paths { + path = append([]*Node{root}, path...) // add the new root to the beginning + root.Paths = append(root.Paths, path) } } diff --git a/internal/graph/node.go b/internal/graph/node.go index cca2964..3a53b57 100644 --- a/internal/graph/node.go +++ b/internal/graph/node.go @@ -15,17 +15,21 @@ type WeightedEdge struct { // Node has an ID and a map of WeightedEdges or weighted relationships to other // Nodes. type Node struct { - ID string - Object types.Object - Edges map[string]WeightedEdge + ID string + Object types.Object + Edges map[string]WeightedEdge + Paths [][]*Node + // ConnectednessStrength is the current "strongest" or highest shortest path distance + ConnectednessStrength float64 } // NewNode creates a pointer to a new Node with ID, id, and initializes a map of Edges. func NewNode(id string, obj types.Object) *Node { return &Node{ - ID: id, - Object: obj, - Edges: make(map[string]WeightedEdge), + ID: id, + Object: obj, + Edges: make(map[string]WeightedEdge), + Paths: make([][]*Node, 0), // going to dynamically resize } } From 037f784cc04fd85fb55cf3158acd8ed886df5c59 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Thu, 17 Oct 2019 08:58:16 -0400 Subject: [PATCH 03/37] [WIP] internal/graph: working on calculating edge distances Signed-off-by: Colton McCurdy --- internal/graph/graph.go | 80 +++++++++++++++++++++++++++++------------ internal/graph/node.go | 32 +++++++++++------ 2 files changed, 80 insertions(+), 32 deletions(-) diff --git a/internal/graph/graph.go b/internal/graph/graph.go index ee101bd..ab2d62f 100644 --- a/internal/graph/graph.go +++ b/internal/graph/graph.go @@ -49,6 +49,19 @@ func (g Graph) ContainsNode(id string) bool { return ok } +// FindRoots finds the many possible roots in the graph. +func (g Graph) FindRoots() []*Node { + roots := make([]*Node, 0, len(g)) + + for _, node := range g { + if len(node.Parents) == 0 { + roots = append(roots, node) + } + } + + return roots +} + // Partition returns a slice of nodes that should be split from a given source graph. // thoughts: // 1. we dont necessarily know how the graph is structured; we do know the nodes @@ -59,27 +72,57 @@ func (g Graph) ContainsNode(id string) bool { // 3. calculate distance of every root -> leaf // a. return paths and their weights func (g Graph) Partition(epsilon float64) [][]Node { + roots := g.FindRoots() - for _, root := range g { + for _, root := range roots { visited := make(map[string]bool) - recursiveBFS(root, visited, appendPaths) + recursiveBFS(root, visited) - // iterate through shortest paths and calculate connectedness and then distance - // sum connectedness + for _, node := range g { + node.StrongestShortestPathLens = append(node.StrongestShortestPathLens, node.StrongestShortestPathLen) + } } - // TODO: calculate distance - // n.Distance = calculateDistance() - // 1 over the sum of connectedness + g.calculateDistances() + g.identifyPartitions(epsilon) + } -func recursiveBFS(root *Node, visited map[string]bool, fn func(a, b *Node)) { +func (g Graph) identifyPartitions(epsilon float64) { + // TODO: partitions should be edges, not nodes + + partitions := make([]*Node) + for _, node := range g { + if node.Distance > epsilon { + node.Partition = true + } + } +} + +// TODO: distances should be on edges, not nodes +func (g Graph) calculateDistances() { + for _, node := range g { + var sum float64 + for _, sp := range node.StrongestShortestPathLens { + sum += sp + } + + node.Distance = 1 / (sum) + } +} + +func recursiveBFS(node *Node, visited map[string]bool) { queue := list.New() - visited[root.ID] = true + visited[node.ID] = true - for _, edge := range root.Edges { + for _, edge := range node.Edges { if _, ok := visited[edge.Dest.ID]; ok { - fn(root, edge.Dest) + + p := node.StrongestShortestPathLen + edge.Weight + if edge.Dest.StrongestShortestPathLen < p { + edge.Dest.StrongestShortestPathLen = p + } + continue } @@ -87,21 +130,14 @@ func recursiveBFS(root *Node, visited map[string]bool, fn func(a, b *Node)) { } for queue.Len() > 0 { - e := queue.Front() - n, ok := e.Value.(*Node) + n := queue.Front() + e, ok := n.Value.(*Node) if !ok { continue } - recursiveBFS(n, visited, fn) - } -} - -// appendPaths appends that paths of the edge to the paths of the root. -func appendPaths(root, edge *Node) { - for _, path := range edge.Paths { - path = append([]*Node{root}, path...) // add the new root to the beginning - root.Paths = append(root.Paths, path) + e.StrongestShortestPathLen = node.StrongestShortestPathLen + node.Edges[e.ID].Weight + recursiveBFS(e, visited) } } diff --git a/internal/graph/node.go b/internal/graph/node.go index 3a53b57..245b6e9 100644 --- a/internal/graph/node.go +++ b/internal/graph/node.go @@ -5,6 +5,10 @@ import ( "go/types" ) +const ( + defaultConnectednessValue = -1 // default to an invalid, "infinity", value. +) + // WeightedEdge contains pointers to source and destination Nodes as well as // the assigned weight of the relationship. type WeightedEdge struct { @@ -15,25 +19,28 @@ type WeightedEdge struct { // Node has an ID and a map of WeightedEdges or weighted relationships to other // Nodes. type Node struct { - ID string - Object types.Object - Edges map[string]WeightedEdge - Paths [][]*Node - // ConnectednessStrength is the current "strongest" or highest shortest path distance - ConnectednessStrength float64 + ID string + Object types.Object + Edges map[string]WeightedEdge + Parents map[string]WeightedEdge + StrongestShortestPathLen float64 // used for a single graph traversals + + StrongestShortestPathLens []float64 // used across multiple graph traversals + Distance float64 } // NewNode creates a pointer to a new Node with ID, id, and initializes a map of Edges. func NewNode(id string, obj types.Object) *Node { return &Node{ - ID: id, - Object: obj, - Edges: make(map[string]WeightedEdge), - Paths: make([][]*Node, 0), // going to dynamically resize + ID: id, + Object: obj, + Edges: make(map[string]WeightedEdge), + Parents: make(map[string]WeightedEdge), } } // AddEdges adds weighted edges to a source node that signify relationships with other nodes. +// Also adds parents to the destination node. func (n *Node) AddEdge(dest *Node, w float64) { // prevent edge between node and itself if n.ID == dest.ID { @@ -44,6 +51,11 @@ func (n *Node) AddEdge(dest *Node, w float64) { Weight: w, Dest: dest, } + + dest.Parents[n.ID] = WeightedEdge{ + Weight: w, + Dest: n, + } } // ContainsEdge returns whether or not the graph contains an edge from source to dest. From a869368736a5ef36f4ba22cc1ffa138693951df9 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Fri, 18 Oct 2019 09:14:59 -0400 Subject: [PATCH 04/37] [WIP] internal/graph: graph traversal is done. working on calculating distance Signed-off-by: Colton McCurdy --- internal/graph/graph.go | 69 +++++++++++++++++++++++++++-------------- internal/graph/node.go | 25 +++++++++------ 2 files changed, 61 insertions(+), 33 deletions(-) diff --git a/internal/graph/graph.go b/internal/graph/graph.go index ab2d62f..586a5c5 100644 --- a/internal/graph/graph.go +++ b/internal/graph/graph.go @@ -74,40 +74,45 @@ func (g Graph) FindRoots() []*Node { func (g Graph) Partition(epsilon float64) [][]Node { roots := g.FindRoots() + edges := make([]WeightedEdge, 0) + for _, node := range g { + for _, edge := range node.Edges { + edges = append(edges, edge) + } + } + for _, root := range roots { visited := make(map[string]bool) recursiveBFS(root, visited) - for _, node := range g { - node.StrongestShortestPathLens = append(node.StrongestShortestPathLens, node.StrongestShortestPathLen) + for _, edge := range edges { + // skip if the node is the root because we don't want to add '0' to the slice of + // seen min paths. This could lead to division by zero. + if edge.Source == root { + continue + } + edge.MinPathStrengths = append(edge.MinPathStrengths, edge.Source.MinPathStrength) } } - g.calculateDistances() - g.identifyPartitions(epsilon) - + g.calculateDistances(edges) + g.identifyPartitions(edges, epsilon) } func (g Graph) identifyPartitions(epsilon float64) { - // TODO: partitions should be edges, not nodes - - partitions := make([]*Node) - for _, node := range g { - if node.Distance > epsilon { - node.Partition = true - } - } } -// TODO: distances should be on edges, not nodes func (g Graph) calculateDistances() { for _, node := range g { - var sum float64 - for _, sp := range node.StrongestShortestPathLens { - sum += sp - } + for _, edge := range node.Edges { - node.Distance = 1 / (sum) + var sum float64 + for _, mps := range edge.MinPathStrengths { + sum += mps + } + + edge.Distance = 1 / (sum) + } } } @@ -117,10 +122,26 @@ func recursiveBFS(node *Node, visited map[string]bool) { for _, edge := range node.Edges { if _, ok := visited[edge.Dest.ID]; ok { + var updateChildren bool + + if edge.Weight < edge.Dest.MinPathStrength { + edge.Dest.MinPathStrength = edge.Weight + updateChildren = true + } - p := node.StrongestShortestPathLen + edge.Weight - if edge.Dest.StrongestShortestPathLen < p { - edge.Dest.StrongestShortestPathLen = p + if node.MinPathStrength < edge.Dest.MinPathStrength { + edge.Dest.MinPathStrength = node.ShortestPathLen + updateChildren = true + } + + pLen := node.ShortestPathLen + edge.Weight + if pLen < edge.Dest.ShortestPathLen { + edge.Dest.ShortestPathLen = pLen + updateChildren = true + } + + if updateChildren { + queue.PushBack(edge.Dest) } continue @@ -136,7 +157,9 @@ func recursiveBFS(node *Node, visited map[string]bool) { continue } - e.StrongestShortestPathLen = node.StrongestShortestPathLen + node.Edges[e.ID].Weight + e.ShortestPathLen = node.ShortestPathLen + node.Edges[e.ID].Weight + e.MinPathStrength = node.Edges[e.ID].Weight + recursiveBFS(e, visited) } } diff --git a/internal/graph/node.go b/internal/graph/node.go index 245b6e9..245bb94 100644 --- a/internal/graph/node.go +++ b/internal/graph/node.go @@ -12,21 +12,26 @@ const ( // WeightedEdge contains pointers to source and destination Nodes as well as // the assigned weight of the relationship. type WeightedEdge struct { - Weight float64 - Dest *Node + Weight float64 + Source, Dest *Node + + ConnectednessStrength float64 + MinPathStrengths []float64 // used across multiple graph traversals + + Partition bool + Distance float64 } // Node has an ID and a map of WeightedEdges or weighted relationships to other // Nodes. type Node struct { - ID string - Object types.Object - Edges map[string]WeightedEdge - Parents map[string]WeightedEdge - StrongestShortestPathLen float64 // used for a single graph traversals - - StrongestShortestPathLens []float64 // used across multiple graph traversals - Distance float64 + ID string + Object types.Object + Edges map[string]WeightedEdge + Parents map[string]WeightedEdge // TODO: may be able to delete this now + + MinPathStrength float64 + ShortestPathLen float64 // used for a single graph traversals } // NewNode creates a pointer to a new Node with ID, id, and initializes a map of Edges. From 01264a6da49eca60132ef117fd8d4b0cbcc1be2d Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Mon, 21 Oct 2019 08:53:31 -0400 Subject: [PATCH 05/37] internal/graph: first full draft complete. thinking about summation (global) vs just min (local) of connectedness value Signed-off-by: Colton McCurdy --- internal/graph/graph.go | 76 ++++++++++++++++++++++++++++------------- 1 file changed, 52 insertions(+), 24 deletions(-) diff --git a/internal/graph/graph.go b/internal/graph/graph.go index 586a5c5..f81c83c 100644 --- a/internal/graph/graph.go +++ b/internal/graph/graph.go @@ -62,16 +62,16 @@ func (g Graph) FindRoots() []*Node { return roots } -// Partition returns a slice of nodes that should be split from a given source graph. -// thoughts: -// 1. we dont necessarily know how the graph is structured; we do know the nodes -// a. this is why we have to traverse from every node as the root -// 2. we need the distance between every pair of _connected_ nodes -// a. we have all of the edge weights -// b. we dont yet know about connectedness (we'll find that out here when we traverse the graph) -// 3. calculate distance of every root -> leaf -// a. return paths and their weights -func (g Graph) Partition(epsilon float64) [][]Node { +// Partition returns a slice of WeightedEdges that should be partitioned (i.e., "broken"). +// The pseudo-algorithm for Partition is as follows: +// 1. find root nodes +// 2. find all edges in the graph +// 3. for each root +// a. recursively traverse the graph to identify the min shortest path to get to a node +// b. for each edge in the graph, store the min shortest path to a node from the root under observation +// 4. calculate edge distances +// 5. identify edges where the sum of cross-iteration distances > epsilon +func (g Graph) Partition(epsilon float64) []WeightedEdge { roots := g.FindRoots() edges := make([]WeightedEdge, 0) @@ -95,27 +95,55 @@ func (g Graph) Partition(epsilon float64) [][]Node { } } - g.calculateDistances(edges) - g.identifyPartitions(edges, epsilon) + dists := g.calculateDistances(edges) + return g.identifyPartitions(dists, epsilon) } -func (g Graph) identifyPartitions(epsilon float64) { -} +// calculateDistances calculates the strongest strong distance of an edge from +// every possible root in the graph. +// +// The strongest strong distance is defined as: +// * 1 / CONN(u, v) +// * where CONN(u, v) denotes the strength of connectedness of a pair of vertices. +// * CONN(u, v) is defined as the MAX{s(P)}, where s(P) denotes the MIN edge weight in a path. +// * CONN(u, v) can be summarized as the max, weakest edge between nodes of various paths. +// +// "Distances in Weighted Graphs" with authors Dhanyamol M V and Sunil Mathew. +// * http://www.researchmathsci.org/apamart/apam-v8n1-1.pdf +// +// In splitfile, we modify this equation slightly to get a more global understanding +// * 1 / SUM(CONN(u, v)) +// * where we are summing the CONN(u, v) from different roots +// +// If you are interested in my notes/highlights, check out this Tweet +// * https://twitter.com/McCurdyColton/status/1179024173664002049?s=20 +func (g Graph) calculateDistances(edges []WeightedEdge) []WeightedEdge { + res := make([]WeightedEdge, 0, len(edges)) + + for _, edge := range edges { + + var sum float64 + for _, mps := range edge.MinPathStrengths { + sum += mps + } -func (g Graph) calculateDistances() { - for _, node := range g { - for _, edge := range node.Edges { + edge.Distance = 1 / (sum) + res = append(res, edge) + } - var sum float64 - for _, mps := range edge.MinPathStrengths { - sum += mps - } + return res +} - edge.Distance = 1 / (sum) - } - } +// identifyPartitions identifies the edges where the distance is greater than the +// configured epsilon value. These edges will be split/"broken". +func (g Graph) identifyPartitions(edges []WeightedEdge, epsilon float64) []WeightedEdge { + res := make([]WeightedEdge, 0, len(edges)) + + return res } +// recursiveBFS does recursive breadth-first search of the graph, keeping track +// of shortest path and the mininimum path strength (i.e., the weaked edge in a path). func recursiveBFS(node *Node, visited map[string]bool) { queue := list.New() visited[node.ID] = true From 7b79d8c35ce2b95c56f4eae3a29bfd12303e209f Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Tue, 22 Oct 2019 06:53:10 -0400 Subject: [PATCH 06/37] internal/graph: moving parition code to its own file and moving methods to funcs Signed-off-by: Colton McCurdy --- internal/graph/graph.go | 161 ------------------------------------ internal/graph/partition.go | 144 ++++++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+), 161 deletions(-) create mode 100644 internal/graph/partition.go diff --git a/internal/graph/graph.go b/internal/graph/graph.go index f81c83c..203a347 100644 --- a/internal/graph/graph.go +++ b/internal/graph/graph.go @@ -1,29 +1,9 @@ package graph import ( - "container/list" - "github.com/pkg/errors" ) -// Weights defines the configurable edge weights -type Weights struct { - // TypeField defines the edge weight of a type that has a field of another type. - TypeField float64 - // TypeMethod defines the edge weight of a type that is a func/method receiver. - TypeMethod float64 - // VarOfType defines the edge weight between a variable and the variable's type. - VarOfType float64 - // ConstOfType defines the edge weight between a constant and the constant's type. - ConstOfType float64 - // FuncParam defines the edge weight between a function parameter and the parameter's type. - FuncParam float64 - // FuncReturn defines the edge weight between a function return value and the value's type. - FuncReturn float64 - // FuncBody defines the edge weight between a value used in a function body and the value's type. - FuncBody float64 -} - // Graph is a map of node IDs to the Node with that ID. type Graph map[string]*Node @@ -61,144 +41,3 @@ func (g Graph) FindRoots() []*Node { return roots } - -// Partition returns a slice of WeightedEdges that should be partitioned (i.e., "broken"). -// The pseudo-algorithm for Partition is as follows: -// 1. find root nodes -// 2. find all edges in the graph -// 3. for each root -// a. recursively traverse the graph to identify the min shortest path to get to a node -// b. for each edge in the graph, store the min shortest path to a node from the root under observation -// 4. calculate edge distances -// 5. identify edges where the sum of cross-iteration distances > epsilon -func (g Graph) Partition(epsilon float64) []WeightedEdge { - roots := g.FindRoots() - - edges := make([]WeightedEdge, 0) - for _, node := range g { - for _, edge := range node.Edges { - edges = append(edges, edge) - } - } - - for _, root := range roots { - visited := make(map[string]bool) - recursiveBFS(root, visited) - - for _, edge := range edges { - // skip if the node is the root because we don't want to add '0' to the slice of - // seen min paths. This could lead to division by zero. - if edge.Source == root { - continue - } - edge.MinPathStrengths = append(edge.MinPathStrengths, edge.Source.MinPathStrength) - } - } - - dists := g.calculateDistances(edges) - return g.identifyPartitions(dists, epsilon) -} - -// calculateDistances calculates the strongest strong distance of an edge from -// every possible root in the graph. -// -// The strongest strong distance is defined as: -// * 1 / CONN(u, v) -// * where CONN(u, v) denotes the strength of connectedness of a pair of vertices. -// * CONN(u, v) is defined as the MAX{s(P)}, where s(P) denotes the MIN edge weight in a path. -// * CONN(u, v) can be summarized as the max, weakest edge between nodes of various paths. -// -// "Distances in Weighted Graphs" with authors Dhanyamol M V and Sunil Mathew. -// * http://www.researchmathsci.org/apamart/apam-v8n1-1.pdf -// -// In splitfile, we modify this equation slightly to get a more global understanding -// * 1 / SUM(CONN(u, v)) -// * where we are summing the CONN(u, v) from different roots -// -// If you are interested in my notes/highlights, check out this Tweet -// * https://twitter.com/McCurdyColton/status/1179024173664002049?s=20 -func (g Graph) calculateDistances(edges []WeightedEdge) []WeightedEdge { - res := make([]WeightedEdge, 0, len(edges)) - - for _, edge := range edges { - - var sum float64 - for _, mps := range edge.MinPathStrengths { - sum += mps - } - - edge.Distance = 1 / (sum) - res = append(res, edge) - } - - return res -} - -// identifyPartitions identifies the edges where the distance is greater than the -// configured epsilon value. These edges will be split/"broken". -func (g Graph) identifyPartitions(edges []WeightedEdge, epsilon float64) []WeightedEdge { - res := make([]WeightedEdge, 0, len(edges)) - - return res -} - -// recursiveBFS does recursive breadth-first search of the graph, keeping track -// of shortest path and the mininimum path strength (i.e., the weaked edge in a path). -func recursiveBFS(node *Node, visited map[string]bool) { - queue := list.New() - visited[node.ID] = true - - for _, edge := range node.Edges { - if _, ok := visited[edge.Dest.ID]; ok { - var updateChildren bool - - if edge.Weight < edge.Dest.MinPathStrength { - edge.Dest.MinPathStrength = edge.Weight - updateChildren = true - } - - if node.MinPathStrength < edge.Dest.MinPathStrength { - edge.Dest.MinPathStrength = node.ShortestPathLen - updateChildren = true - } - - pLen := node.ShortestPathLen + edge.Weight - if pLen < edge.Dest.ShortestPathLen { - edge.Dest.ShortestPathLen = pLen - updateChildren = true - } - - if updateChildren { - queue.PushBack(edge.Dest) - } - - continue - } - - queue.PushBack(edge.Dest) - } - - for queue.Len() > 0 { - n := queue.Front() - e, ok := n.Value.(*Node) - if !ok { - continue - } - - e.ShortestPathLen = node.ShortestPathLen + node.Edges[e.ID].Weight - e.MinPathStrength = node.Edges[e.ID].Weight - - recursiveBFS(e, visited) - } -} - -func (g Graph) shouldPartition() bool { - if len(g) <= 1 { - return false - } - - // TODO: thought; maybe eventually we store meta data about the graph - // (e.g., types of edges such as method and if there are zero methods, maybe we consider not partitioning) - - return true -} diff --git a/internal/graph/partition.go b/internal/graph/partition.go new file mode 100644 index 0000000..ed5533a --- /dev/null +++ b/internal/graph/partition.go @@ -0,0 +1,144 @@ +package graph + +import "container/list" + +func shouldPartition(g Graph) bool { + if len(g) <= 1 { + return false + } + + // TODO: thought; maybe eventually we store meta data about the graph + // (e.g., types of edges such as method and if there are zero methods, maybe we consider not partitioning) + + return true +} + +// Partition returns a slice of WeightedEdges that should be partitioned (i.e., "broken"). +// The pseudo-algorithm for Partition is as follows: +// 1. find root nodes +// 2. find all edges in the graph +// 3. for each root +// a. recursively traverse the graph to identify the min shortest path to get to a node +// b. for each edge in the graph, store the min shortest path to a node from the root under observation +// 4. calculate edge distances +// 5. identify edges where the sum of cross-iteration distances > epsilon +func Partition(g Graph, epsilon float64) []WeightedEdge { + roots := g.FindRoots() + + edges := make([]WeightedEdge, 0) + for _, node := range g { + for _, edge := range node.Edges { + edges = append(edges, edge) + } + } + + for _, root := range roots { + visited := make(map[string]bool) + recursiveBFS(root, visited) + + for _, edge := range edges { + // skip if the node is the root because we don't want to add '0' to the slice of + // seen min paths. This could lead to division by zero. + if edge.Source == root { + continue + } + edge.MinPathStrengths = append(edge.MinPathStrengths, edge.Source.MinPathStrength) + } + } + + dists := calculateDistances(g, edges) + return identifyPartitions(g, dists, epsilon) +} + +// calculateDistances calculates the strongest strong distance of an edge from +// every possible root in the graph. +// +// The strongest strong distance is defined as: +// * 1 / CONN(u, v) +// * where CONN(u, v) denotes the strength of connectedness of a pair of vertices. +// * CONN(u, v) is defined as the MAX{s(P)}, where s(P) denotes the MIN edge weight in a path. +// * CONN(u, v) can be summarized as the max, weakest edge between nodes of various paths. +// +// "Distances in Weighted Graphs" with authors Dhanyamol M V and Sunil Mathew. +// * http://www.researchmathsci.org/apamart/apam-v8n1-1.pdf +// +// In splitfile, we modify this equation slightly to get a more global understanding +// * 1 / SUM(CONN(u, v)) +// * where we are summing the CONN(u, v) from different roots +// +// If you are interested in my notes/highlights, check out this Tweet +// * https://twitter.com/McCurdyColton/status/1179024173664002049?s=20 +func calculateDistances(g Graph, edges []WeightedEdge) []WeightedEdge { + res := make([]WeightedEdge, 0, len(edges)) + + for _, edge := range edges { + + var sum float64 + for _, mps := range edge.MinPathStrengths { + sum += mps + } + + edge.Distance = 1 / (sum) + res = append(res, edge) + } + + return res +} + +// identifyPartitions identifies the edges where the distance is greater than the +// configured epsilon value. These edges will be split/"broken". +func identifyPartitions(g Graph, edges []WeightedEdge, epsilon float64) []WeightedEdge { + res := make([]WeightedEdge, 0, len(edges)) + + return res +} + +// recursiveBFS does recursive breadth-first search of the graph, keeping track +// of shortest path and the mininimum path strength (i.e., the weaked edge in a path). +func recursiveBFS(node *Node, visited map[string]bool) { + queue := list.New() + visited[node.ID] = true + + for _, edge := range node.Edges { + if _, ok := visited[edge.Dest.ID]; ok { + var updateChildren bool + + if edge.Weight < edge.Dest.MinPathStrength { + edge.Dest.MinPathStrength = edge.Weight + updateChildren = true + } + + if node.MinPathStrength < edge.Dest.MinPathStrength { + edge.Dest.MinPathStrength = node.ShortestPathLen + updateChildren = true + } + + pLen := node.ShortestPathLen + edge.Weight + if pLen < edge.Dest.ShortestPathLen { + edge.Dest.ShortestPathLen = pLen + updateChildren = true + } + + if updateChildren { + queue.PushBack(edge.Dest) + } + + continue + } + + queue.PushBack(edge.Dest) + } + + for queue.Len() > 0 { + n := queue.Front() + e, ok := n.Value.(*Node) + if !ok { + continue + } + + e.ShortestPathLen = node.ShortestPathLen + node.Edges[e.ID].Weight + e.MinPathStrength = node.Edges[e.ID].Weight + + recursiveBFS(e, visited) + } +} From f783132b62701f7dd734be296c1b92330c4e069d Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Tue, 22 Oct 2019 06:53:32 -0400 Subject: [PATCH 07/37] splitfile: update to use the new Partition API Signed-off-by: Colton McCurdy --- splitfile.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/splitfile.go b/splitfile.go index 547f6f4..580b0a6 100644 --- a/splitfile.go +++ b/splitfile.go @@ -13,7 +13,7 @@ import ( var Analyzer = &analysis.Analyzer{ Name: "splitfile", - Doc: "checks for clean splits of files in packages based on objects and their relationships with other objects.", + Doc: "A static analysis that identifies partitions of declarations and their uses to improve the readability of Go packages.", Requires: []*analysis.Analyzer{inspect.Analyzer}, Run: run, } @@ -21,9 +21,9 @@ var Analyzer = &analysis.Analyzer{ func run(pass *analysis.Pass) (interface{}, error) { g := traverse(pass.TypesInfo.Defs) - nodes := g.Partition() // TODO (Issue#8): right now, this returns every single node in the graph - for _, n := range nodes { - pass.Reportf(n.Object.Pos(), "parition found - %+v", n) + edges := graph.Partition(g, 1.0) + for _, e := range edges { + pass.Reportf(e.Source.Object.Pos(), "parition found between -> %+v", e.Dest.Object.Pos()) } return nil, nil From 5bc520d4589aea6161bda6313b61c123306af5d5 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Wed, 23 Oct 2019 06:58:14 -0400 Subject: [PATCH 08/37] internal/graph: update partition identifier function to check epsilon against edge distance Signed-off-by: Colton McCurdy --- internal/graph/partition.go | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/internal/graph/partition.go b/internal/graph/partition.go index ed5533a..8b6836e 100644 --- a/internal/graph/partition.go +++ b/internal/graph/partition.go @@ -47,7 +47,7 @@ func Partition(g Graph, epsilon float64) []WeightedEdge { } dists := calculateDistances(g, edges) - return identifyPartitions(g, dists, epsilon) + return identifyPartitions(dists, epsilon) } // calculateDistances calculates the strongest strong distance of an edge from @@ -87,9 +87,15 @@ func calculateDistances(g Graph, edges []WeightedEdge) []WeightedEdge { // identifyPartitions identifies the edges where the distance is greater than the // configured epsilon value. These edges will be split/"broken". -func identifyPartitions(g Graph, edges []WeightedEdge, epsilon float64) []WeightedEdge { +func identifyPartitions(edges []WeightedEdge, epsilon float64) []WeightedEdge { res := make([]WeightedEdge, 0, len(edges)) + for _, e := range edges { + if e.Distance > epsilon { + res = append(res, e) + } + } + return res } From afd6b3886ee7da1e0a5d44689588fb18385f14cc Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Wed, 23 Oct 2019 06:58:49 -0400 Subject: [PATCH 09/37] [WIP] internal/graph: update graph tests to work with new API Signed-off-by: Colton McCurdy --- internal/graph/graph_test.go | 44 +++++++++++++++++++++++++++++++-- internal/graph/node_test.go | 48 +++++++++++++++++++++++++++++------- 2 files changed, 81 insertions(+), 11 deletions(-) diff --git a/internal/graph/graph_test.go b/internal/graph/graph_test.go index 38252d9..7e42bb4 100644 --- a/internal/graph/graph_test.go +++ b/internal/graph/graph_test.go @@ -31,6 +31,13 @@ func TestNew(t *testing.T) { } var ( + parentEdge = map[string]WeightedEdge{ + "parent": WeightedEdge{ + Weight: 1.0, + Dest: &Node{ID: "parent", Edges: make(map[string]WeightedEdge)}, + }, + } + edgeB = map[string]WeightedEdge{ "b": WeightedEdge{ Weight: 1.0, @@ -38,8 +45,9 @@ var ( }, } - nodeAWithSingleEdgeB = Node{ID: "a", Edges: edgeB} - nodeCWithSingleEdgeB = Node{ID: "c", Edges: edgeB} + nodeAWithSingleEdgeB = Node{ID: "a", Edges: edgeB} + nodeCWithSingleEdgeB = Node{ID: "c", Edges: edgeB} + nodeCWithEdgeBAndParent = Node{ID: "c", Edges: edgeB, Parents: parentEdge} ) func TestAddNode(t *testing.T) { @@ -153,3 +161,35 @@ func TestContainsNode(t *testing.T) { }) } } + +func TestFindRoots(t *testing.T) { + tests := []struct { + name string + graph Graph + want []*Node + }{ + { + name: "empty-graph", + graph: Graph(make(map[string]*Node)), + want: []*Node{}, + }, + + { + name: "one-root-node", + graph: Graph(map[string]*Node{ + "c": &nodeCWithEdgeBAndParent, + }), + want: []*Node{&Node{ID: "parent", Edges: make(map[string]WeightedEdge)}}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + got := test.graph.FindRoots() + + if diff := cmp.Diff(test.want, got); diff != "" { + t.Errorf("(%+v) FindRoots() mismatch (-want +got): \n%s", test.graph, diff) + } + }) + } +} diff --git a/internal/graph/node_test.go b/internal/graph/node_test.go index 63f7c69..2bb8c77 100644 --- a/internal/graph/node_test.go +++ b/internal/graph/node_test.go @@ -5,6 +5,26 @@ import ( "testing" ) +var ( + nodeA = Node{ + ID: "a", + Object: nil, + Edges: map[string]WeightedEdge{}, + Parents: map[string]WeightedEdge{}, + MinPathStrength: 1.0, + ShortestPathLen: 1.0, + } + + nodeB = Node{ + ID: "b", + Object: nil, + Edges: map[string]WeightedEdge{}, + Parents: map[string]WeightedEdge{}, + MinPathStrength: 1.0, + ShortestPathLen: 1.0, + } +) + func TestAddEdge(t *testing.T) { tests := []struct { name string @@ -15,21 +35,31 @@ func TestAddEdge(t *testing.T) { }{ { name: "add-edge-w1.0-to-empty-slice-should-return-edges-with-single-value", - node: &Node{"a", nil, map[string]WeightedEdge{}}, - dest: &Node{"b", nil, map[string]WeightedEdge{}}, + node: &nodeA, + dest: &nodeB, weight: 1.0, - want: Node{"a", nil, map[string]WeightedEdge{"b": WeightedEdge{ - Weight: 1.0, - Dest: &Node{ID: "b", Object: nil, Edges: map[string]WeightedEdge{}}, - }}}, + want: Node{ + ID: "a", + Object: nil, + Edges: map[string]WeightedEdge{ + "b": WeightedEdge{ + Weight: 1.0, + Source: &nodeA, + Dest: &nodeB, + }, + }, + Parents: map[string]WeightedEdge{}, + MinPathStrength: 1.0, + ShortestPathLen: 1.0, + }, }, { name: "add-edge-to-same-node-does-nothing", - node: &Node{"a", nil, map[string]WeightedEdge{}}, - dest: &Node{"a", nil, map[string]WeightedEdge{}}, + node: &nodeA, + dest: &nodeA, weight: 1.0, - want: Node{"a", nil, map[string]WeightedEdge{}}, + want: nodeA, }, } From 1ba49f1c6d86e31438923120cf7d08b004342980 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Wed, 23 Oct 2019 06:59:13 -0400 Subject: [PATCH 10/37] splitfile: update epsilon value 1 -> 0.1 Signed-off-by: Colton McCurdy --- splitfile.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/splitfile.go b/splitfile.go index 580b0a6..b492869 100644 --- a/splitfile.go +++ b/splitfile.go @@ -21,7 +21,7 @@ var Analyzer = &analysis.Analyzer{ func run(pass *analysis.Pass) (interface{}, error) { g := traverse(pass.TypesInfo.Defs) - edges := graph.Partition(g, 1.0) + edges := graph.Partition(g, 0.1) // TODO: make this epsilon value configurable (or at least find a reasonable default i.e., the "natural" value) for _, e := range edges { pass.Reportf(e.Source.Object.Pos(), "parition found between -> %+v", e.Dest.Object.Pos()) } From 2c24453025b7ab18e69dd371f60631c4b6335b14 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Wed, 23 Oct 2019 06:59:42 -0400 Subject: [PATCH 11/37] testdata/src/abc: update testdata to use example in the README Signed-off-by: Colton McCurdy --- testdata/src/abc/abc.go | 45 +++++++++-------------------------------- 1 file changed, 9 insertions(+), 36 deletions(-) diff --git a/testdata/src/abc/abc.go b/testdata/src/abc/abc.go index a0ac4fb..9d470f5 100644 --- a/testdata/src/abc/abc.go +++ b/testdata/src/abc/abc.go @@ -1,40 +1,13 @@ package abc -type Ta struct { - Tb Tb -} +type ( + A int + B int + C int +) -func (Ta) M1() {} -func (Ta) M2() {} -func Fa() Ta { - var ta Ta - return ta -} +func fabc(a A, b B, c C) {} -type Tb int - -func (Tb) M1() {} -func FCommon() { - var ta Ta - var tb Tb - - if ta.Tb == 0 || tb == 0 { - return - } - return -} - -func FCommonParams(ta Ta, tb Tb) { - return -} - -type tNonExported int - -func (tNonExported) M1() {} -func (tNonExported) M2() {} -func (t *tNonExported) M3() {} -func (t *tNonExported) M4() {} -func F() *tNonExported { - t := tNonExported(1) - return &t -} +func (a A) ma(b B) {} +func (b B) mb() {} +func (c C) mc(b B) {} From bfd037fb48b0d391c22a12f96cc0b451343ea0e3 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Fri, 29 Nov 2019 15:31:20 -0500 Subject: [PATCH 12/37] ider: id function uses type instead of position and add tests Signed-off-by: Colton McCurdy --- ider.go | 9 +++++---- ider_test.go | 32 ++++++++++++++++++++++++-------- 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/ider.go b/ider.go index 9fc90ff..1547e55 100644 --- a/ider.go +++ b/ider.go @@ -2,20 +2,21 @@ package splitfile import ( "fmt" - "go/token" "go/types" ) type Ider interface { Pkg() *types.Package Name() string - Pos() token.Pos + Type() types.Type } func Id(ider Ider) string { + id := fmt.Sprintf("%s %s", ider.Name(), ider.Type().String()) + if ider.Pkg() != nil { - return fmt.Sprintf("%s %s %d", ider.Pkg().String(), ider.Name(), ider.Pos()) + id = fmt.Sprintf("%s %s", ider.Pkg().String(), id) } - return fmt.Sprintf("%s %d", ider.Name(), ider.Pos()) + return id } diff --git a/ider_test.go b/ider_test.go index eeee959..61c51a9 100644 --- a/ider_test.go +++ b/ider_test.go @@ -1,20 +1,26 @@ package splitfile import ( - "go/token" "go/types" "testing" ) +type mockType struct { + s string +} + +func (mt mockType) Underlying() types.Type { return nil } +func (mt mockType) String() string { return mt.s } + type mockObject struct { pkgFn func() *types.Package nameFn func() string - posFn func() token.Pos + typeFn func() types.Type } func (mo mockObject) Pkg() *types.Package { return mo.pkgFn() } func (mo mockObject) Name() string { return mo.nameFn() } -func (mo mockObject) Pos() token.Pos { return mo.posFn() } +func (mo mockObject) Type() types.Type { return mo.typeFn() } func TestId(t *testing.T) { var tests = []struct { @@ -22,14 +28,24 @@ func TestId(t *testing.T) { want string pkgFn func() *types.Package nameFn func() string - posFn func() token.Pos + typeFn func() types.Type }{ { - name: "no package", - want: "name 1", + name: "no_package", + want: "name type", pkgFn: func() *types.Package { return nil }, nameFn: func() string { return "name" }, - posFn: func() token.Pos { return token.Pos(1) }, + typeFn: func() types.Type { return mockType{s: "type"} }, + }, + + { + name: "package_exists", + want: "package pkg (\"github.com/mccurdyc/path\") name type", + pkgFn: func() *types.Package { + return types.NewPackage("github.com/mccurdyc/path", "pkg") + }, + nameFn: func() string { return "name" }, + typeFn: func() types.Type { return mockType{s: "type"} }, }, } for _, tt := range tests { @@ -37,7 +53,7 @@ func TestId(t *testing.T) { o := mockObject{ pkgFn: tt.pkgFn, nameFn: tt.nameFn, - posFn: tt.posFn, + typeFn: tt.typeFn, } got := Id(o) From d0aaee7169ce20287f8b4d87b67c8f0915c5c8ee Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Sat, 30 Nov 2019 10:19:11 -0500 Subject: [PATCH 13/37] checkSignature should not return the receiver variable as a related node Signed-off-by: Colton McCurdy --- splitfile.go | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/splitfile.go b/splitfile.go index 5aae3fc..10ae334 100644 --- a/splitfile.go +++ b/splitfile.go @@ -50,18 +50,24 @@ func traverse(defs map[*ast.Ident]types.Object) graph.Graph { g := graph.New() for _, def := range defs { - if skip := filter(def); skip { continue } - node := graph.NewNode(Id(def), def) - err := g.AddNode(node) - if err != nil { - continue + // if the graph already contains a node with the same ID, no need to create + // another node with the same ID. This is because multiple variables in the + // same package could use the same name and have the same type. We don't want + // to skip entirely because this is a new instance of that variable and should + // be checked for new related nodes. + if !g.ContainsNode(Id(def)) { + node := graph.NewNode(Id(def), def) + err := g.AddNode(node) + if err != nil { + continue + } } - err = addRelated(g, node) + err := addRelated(g, g[Id(def)]) if err != nil { continue } @@ -140,9 +146,6 @@ func checkMethods(mset *types.MethodSet) []*graph.Node { func checkSignature(sig *types.Signature) []*graph.Node { rel := make([]*graph.Node, 0) - if v := checkVar(sig.Recv()); v != nil { - rel = append(rel, v) - } rel = append(rel, checkTuple(sig.Params())...) rel = append(rel, checkTuple(sig.Results())...) From e83341a9e4601700fec6f8cbc75292d074927951 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Sat, 30 Nov 2019 10:19:51 -0500 Subject: [PATCH 14/37] checkSignature should not return the receiver variable as a related node Signed-off-by: Colton McCurdy --- splitfile.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/splitfile.go b/splitfile.go index 10ae334..b4840a6 100644 --- a/splitfile.go +++ b/splitfile.go @@ -141,8 +141,7 @@ func checkMethods(mset *types.MethodSet) []*graph.Node { return rel } -// checkSignature checks a function signature, the receiver (if it is a method this -// will be a non-nil value), the parameters and the return types. +// checkSignature checks a function signature. Specifically, the parameters and the return types. func checkSignature(sig *types.Signature) []*graph.Node { rel := make([]*graph.Node, 0) From cb8463ef8b8252b76eada0382147f39258ce11da Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Tue, 3 Dec 2019 08:22:29 -0500 Subject: [PATCH 15/37] internal/graph: add methods for returning Roots and Edges of a graph Signed-off-by: Colton McCurdy --- internal/graph/graph.go | 19 +++++++++++++++++-- internal/graph/graph_test.go | 21 ++++++++++++--------- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/internal/graph/graph.go b/internal/graph/graph.go index 203a347..707bbf5 100644 --- a/internal/graph/graph.go +++ b/internal/graph/graph.go @@ -29,8 +29,10 @@ func (g Graph) ContainsNode(id string) bool { return ok } -// FindRoots finds the many possible roots in the graph. -func (g Graph) FindRoots() []*Node { +// Roots finds the many possible roots in the graph. +// The order of the graph traversal matters because if a parent is added _after_ +// a child, then that child node won't have the identified parent node. +func (g Graph) Roots() []*Node { roots := make([]*Node, 0, len(g)) for _, node := range g { @@ -41,3 +43,16 @@ func (g Graph) FindRoots() []*Node { return roots } + +// Edges returns all of the edges in the graph. +func (g Graph) Edges() []WeightedEdge { + edges := make([]WeightedEdge, 0) + + for _, node := range g { + for _, edge := range node.Edges { + edges = append(edges, edge) + } + } + + return edges +} diff --git a/internal/graph/graph_test.go b/internal/graph/graph_test.go index 7e42bb4..ac80c8c 100644 --- a/internal/graph/graph_test.go +++ b/internal/graph/graph_test.go @@ -31,22 +31,24 @@ func TestNew(t *testing.T) { } var ( + parentNode = Node{ID: "parent", Edges: make(map[string]WeightedEdge), Parents: make(map[string]WeightedEdge)} + parentEdge = map[string]WeightedEdge{ "parent": WeightedEdge{ Weight: 1.0, - Dest: &Node{ID: "parent", Edges: make(map[string]WeightedEdge)}, + Dest: &parentNode, }, } edgeB = map[string]WeightedEdge{ "b": WeightedEdge{ Weight: 1.0, - Dest: &Node{ID: "b", Edges: make(map[string]WeightedEdge)}, + Dest: &Node{ID: "b", Edges: make(map[string]WeightedEdge), Parents: make(map[string]WeightedEdge)}, }, } - nodeAWithSingleEdgeB = Node{ID: "a", Edges: edgeB} - nodeCWithSingleEdgeB = Node{ID: "c", Edges: edgeB} + nodeAWithSingleEdgeB = Node{ID: "a", Edges: edgeB, Parents: make(map[string]WeightedEdge)} + nodeCWithSingleEdgeB = Node{ID: "c", Edges: edgeB, Parents: make(map[string]WeightedEdge)} nodeCWithEdgeBAndParent = Node{ID: "c", Edges: edgeB, Parents: parentEdge} ) @@ -162,7 +164,7 @@ func TestContainsNode(t *testing.T) { } } -func TestFindRoots(t *testing.T) { +func TestRoots(t *testing.T) { tests := []struct { name string graph Graph @@ -177,18 +179,19 @@ func TestFindRoots(t *testing.T) { { name: "one-root-node", graph: Graph(map[string]*Node{ - "c": &nodeCWithEdgeBAndParent, + "parent": &parentNode, + "c": &nodeCWithEdgeBAndParent, }), - want: []*Node{&Node{ID: "parent", Edges: make(map[string]WeightedEdge)}}, + want: []*Node{&parentNode}, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - got := test.graph.FindRoots() + got := test.graph.Roots() if diff := cmp.Diff(test.want, got); diff != "" { - t.Errorf("(%+v) FindRoots() mismatch (-want +got): \n%s", test.graph, diff) + t.Errorf("(%+v) Roots() mismatch (-want +got): \n%s", test.graph, diff) } }) } From 57eef6949ba77d4f885db35dcc8e3ce4431460f8 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Tue, 3 Dec 2019 08:23:42 -0500 Subject: [PATCH 16/37] internal/graph: add source and destination to edges Signed-off-by: Colton McCurdy --- internal/graph/node.go | 2 ++ internal/graph/node_test.go | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/internal/graph/node.go b/internal/graph/node.go index 282dcf1..1e33e23 100644 --- a/internal/graph/node.go +++ b/internal/graph/node.go @@ -53,11 +53,13 @@ func (n *Node) AddEdge(dest *Node, w float64) { n.Edges[dest.ID] = WeightedEdge{ Weight: w, + Source: n, Dest: dest, } dest.Parents[n.ID] = WeightedEdge{ Weight: w, + Source: dest, Dest: n, } } diff --git a/internal/graph/node_test.go b/internal/graph/node_test.go index 2bb8c77..f6914fb 100644 --- a/internal/graph/node_test.go +++ b/internal/graph/node_test.go @@ -68,7 +68,7 @@ func TestAddEdge(t *testing.T) { tt.node.AddEdge(tt.dest, tt.weight) if ok := reflect.DeepEqual(*tt.node, tt.want); !ok { - t.Errorf("(%+v) AddEdge(%+v, %f) - mismatch \n%+v", tt.node, tt.dest, tt.weight, tt.want) + t.Errorf("AddEdge() - mismatch \n\twant: %+v\n\tgot:%+v", tt.want, *tt.node) } }) } From b019b8761b885db4081206d4d9fae9dd2cdd8daf Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Tue, 3 Dec 2019 08:24:13 -0500 Subject: [PATCH 17/37] internal/graph: partition should use new Roots and Edges methods Signed-off-by: Colton McCurdy --- internal/graph/partition.go | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/internal/graph/partition.go b/internal/graph/partition.go index 8b6836e..61530a0 100644 --- a/internal/graph/partition.go +++ b/internal/graph/partition.go @@ -1,6 +1,8 @@ package graph -import "container/list" +import ( + "container/list" +) func shouldPartition(g Graph) bool { if len(g) <= 1 { @@ -23,14 +25,8 @@ func shouldPartition(g Graph) bool { // 4. calculate edge distances // 5. identify edges where the sum of cross-iteration distances > epsilon func Partition(g Graph, epsilon float64) []WeightedEdge { - roots := g.FindRoots() - - edges := make([]WeightedEdge, 0) - for _, node := range g { - for _, edge := range node.Edges { - edges = append(edges, edge) - } - } + roots := g.Roots() + edges := g.Edges() for _, root := range roots { visited := make(map[string]bool) From ac55b8d46217b0daba2b2b15d0504aa214384820 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Tue, 3 Dec 2019 08:24:46 -0500 Subject: [PATCH 18/37] analysis/test: uncomment test from filesystem Signed-off-by: Colton McCurdy --- splitfile_analysis_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/splitfile_analysis_test.go b/splitfile_analysis_test.go index b8c4079..f541b8d 100644 --- a/splitfile_analysis_test.go +++ b/splitfile_analysis_test.go @@ -17,10 +17,10 @@ import ( // rather deep directory hierarchy, and obscure similarities among // related tests, especially when tests involve multiple packages, or // multiple variants of a single scenario. -// func TestFromFileSystem(t *testing.T) { -// testdata := analysistest.TestData() -// analysistest.Run(t, testdata, splitfile.Analyzer, "abc") -// } +func TestFromFileSystem(t *testing.T) { + testdata := analysistest.TestData() + analysistest.Run(t, testdata, splitfile.Analyzer, "abc") +} // TestFromStringLiterals demonstrates how to test an analysis using // a table of string literals for each test case. From 776af2d42722a7a7daea7b56de7121cab3f04d49 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Tue, 3 Dec 2019 08:27:02 -0500 Subject: [PATCH 19/37] add gitignore to ignore testdata/pkg Signed-off-by: Colton McCurdy --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..dc3b3a5 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +testdata/pkg/* From 37876b2990bc486a3f28654212ded8bc27a02945 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Tue, 3 Dec 2019 08:28:01 -0500 Subject: [PATCH 20/37] internal/graph: add placeholder for partition tests Signed-off-by: Colton McCurdy --- internal/graph/partition_test.go | 1 + 1 file changed, 1 insertion(+) create mode 100644 internal/graph/partition_test.go diff --git a/internal/graph/partition_test.go b/internal/graph/partition_test.go new file mode 100644 index 0000000..e103ddd --- /dev/null +++ b/internal/graph/partition_test.go @@ -0,0 +1 @@ +package graph From f75ebe42d0e7898fdc77e1e10fc9968361b2ee8a Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Tue, 3 Dec 2019 08:28:38 -0500 Subject: [PATCH 21/37] splitfile tests Signed-off-by: Colton McCurdy --- splitfile_test.go | 116 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 splitfile_test.go diff --git a/splitfile_test.go b/splitfile_test.go new file mode 100644 index 0000000..f4a99b4 --- /dev/null +++ b/splitfile_test.go @@ -0,0 +1,116 @@ +package splitfile + +import ( + "go/ast" + "go/importer" + "go/parser" + "go/token" + "go/types" + "log" + "reflect" + "testing" + + "github.com/mccurdyc/splitfile/internal/graph" +) + +var ( + emptyNode = graph.Node{ + ID: "node", + Object: nil, + Edges: make(map[string]graph.WeightedEdge), + Parents: make(map[string]graph.WeightedEdge), + MinPathStrength: 0.0, + ShortestPathLen: 0.0, + } +) + +func Test_addRelated(t *testing.T) { + var tests = []struct { + name string + graph graph.Graph + pkgpath string + files map[string]string + node *graph.Node + typeNameToCheck string + want graph.Graph + wantErr error + }{ + { + name: "type with no methods", + graph: make(graph.Graph), + pkgpath: "a", + files: map[string]string{"a/a.go": ` + package a + type a int + `}, + node: &emptyNode, + typeNameToCheck: "a", + want: make(graph.Graph), + wantErr: nil, + }, + + { + name: "type with a single method", + graph: make(graph.Graph), + pkgpath: "a", + files: map[string]string{"a/a.go": ` + package a + type a int + func (a a) Val() int { + return int(a) + } + `}, + node: &emptyNode, + typeNameToCheck: "a", + want: graph.Graph{ + "package a (\"a\") 49": &graph.Node{}, + "package a (\"a\") Val 43": &graph.Node{}, + "package a (\"a\") a 38": &graph.Node{}, + }, + wantErr: nil, + }, + } + + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + fset := token.NewFileSet() + var files []*ast.File + + for fname, fileContents := range tt.files { + f, err := parser.ParseFile(fset, fname, fileContents, 0) + if err != nil { + log.Fatal(err) + } + + files = append(files, f) + } + + conf := types.Config{Importer: importer.Default()} + pkg, err := conf.Check(tt.pkgpath, fset, files, nil) + if err != nil { + log.Fatal(err) + } + + tt.node.Object = pkg.Scope().Lookup(tt.typeNameToCheck) + + gotErr := addRelated(tt.graph, tt.node) + + if !reflect.DeepEqual(tt.want, tt.graph) { + t.Errorf("addRelated() mismatch:\n\twant: %+v\n\tgot: %+v", tt.want, tt.graph) + } + + // https://github.com/google/go-cmp/issues/24 + errorCmp := func(x, y error) bool { + if x == nil || y == nil { + return x == nil && y == nil + } + return x.Error() == y.Error() + } + + if ok := errorCmp(gotErr, tt.wantErr); !ok { + t.Errorf("addRelated() = %v, wantErr %v", gotErr, tt.wantErr) + } + }) + } +} From b5a786a85bcab485e293ccc6ef7834456865f3a4 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Tue, 3 Dec 2019 08:29:03 -0500 Subject: [PATCH 22/37] WIP; working on graph traversal. not working properly Signed-off-by: Colton McCurdy --- splitfile.go | 98 ++++++++++++++++++++++++++++++++++------------------ 1 file changed, 65 insertions(+), 33 deletions(-) diff --git a/splitfile.go b/splitfile.go index b4840a6..78502f0 100644 --- a/splitfile.go +++ b/splitfile.go @@ -54,11 +54,6 @@ func traverse(defs map[*ast.Ident]types.Object) graph.Graph { continue } - // if the graph already contains a node with the same ID, no need to create - // another node with the same ID. This is because multiple variables in the - // same package could use the same name and have the same type. We don't want - // to skip entirely because this is a new instance of that variable and should - // be checked for new related nodes. if !g.ContainsNode(Id(def)) { node := graph.NewNode(Id(def), def) err := g.AddNode(node) @@ -82,7 +77,12 @@ func filter(def types.Object) bool { return true } - return false + switch def.(type) { + case *types.Func, *types.TypeName: + return false + } + + return true } type Typer interface { @@ -99,19 +99,30 @@ func addRelated(g graph.Graph, node *graph.Node) error { if !ok { return errors.New("node does not have a Type() method") } + m := checkMethods(types.NewMethodSet(t.Type())) - for _, r := range m { - if r.ID == node.ID { - continue + // it is important to add receivers first + for _, r := range m.receivers { + if !g.ContainsNode(r.ID) { + err := g.AddNode(r) + if err != nil { + continue + } } - err := g.AddNode(r) - if err != nil { - continue + node.AddEdge(r, 5.0) // TODO (Issue #15): read value from config or use default + } + + for _, r := range m.other { + if !g.ContainsNode(r.ID) { + err := g.AddNode(r) + if err != nil { + continue + } } - node.AddEdge(r, 5.0) // TODO (Issue #15): read value from config or use default + node.AddEdge(r, 2.0) // TODO (Issue #15): read value from config or use default } // TODO: check other places for related (e.g., funcs, interfaces, etc.) @@ -119,50 +130,71 @@ func addRelated(g graph.Graph, node *graph.Node) error { return nil } +type methodSetResult struct { + receivers []*graph.Node + other []*graph.Node +} + // checkMethods checks methods' signatures for related types. -func checkMethods(mset *types.MethodSet) []*graph.Node { - rel := make([]*graph.Node, 0) +func checkMethods(g graph.Graph, mset *types.MethodSet) methodSetResult { + res := methodSetResult{ + receivers: make([]*graph.Node, 0), + other: make([]*graph.Node, 0), + } for i := 0; i < mset.Len(); i++ { method := mset.At(i) - m := graph.NewNode(Id(method.Obj()), method.Obj()) - rel = append(rel, m) // methods themselves are always related - sig, ok := method.Type().(*types.Signature) if !ok { - continue + continue // skip evaluating method bodies } - related := checkSignature(sig) - rel = append(rel, related...) + m := graph.NewNode(Id(method.Obj()), method.Obj()) + res.other = append(res.other, m) // methods themselves are always related + + sigRes := checkSignature(sig) + res.receivers = append(res.receivers, sigRes.receivers...) + res.other = append(res.other, sigRes.other...) } - return rel + return res } // checkSignature checks a function signature. Specifically, the parameters and the return types. -func checkSignature(sig *types.Signature) []*graph.Node { - rel := make([]*graph.Node, 0) +func checkSignature(sig *types.Signature) methodSetResult { + res := methodSetResult{ + receivers: make([]*graph.Node, 0), + other: make([]*graph.Node, 0), + } - rel = append(rel, checkTuple(sig.Params())...) - rel = append(rel, checkTuple(sig.Results())...) + if v := checkVar(sig.Recv()); v != nil { + res.receivers = append(res.receivers, v) + } + res.other = append(res.other, checkTuple(sig.Params())...) + res.other = append(res.other, checkTuple(sig.Results())...) - return rel + return res } -// checkVar validates a variable and if it is valid, it is returned as a valid related. +// checkVar validates a variable and if it is valid, it is returned as a valid node. func checkVar(v *types.Var) *graph.Node { if v == nil || v.Type() == types.Type(nil) { return nil } - return graph.NewNode(Id(v), v) + typ, ok := v.Type().(*types.Named) + if !ok { + return nil + } + obj := typ.Obj() + + return graph.NewNode(Id(obj), obj) } -// checkTuple checks a tuple of variables for related nodes. +// checkTuple checks a tuple of variables for nodes. func checkTuple(vars *types.Tuple) []*graph.Node { - rel := make([]*graph.Node, 0) + res := make([]*graph.Node, 0) for i := 0; i < vars.Len(); i++ { v := checkVar(vars.At(i)) @@ -171,8 +203,8 @@ func checkTuple(vars *types.Tuple) []*graph.Node { continue } - rel = append(rel, v) + res = append(res, v) } - return rel + return res } From 100788358b6c747d211d28d872c051af4bfb6708 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Wed, 4 Dec 2019 08:00:05 -0500 Subject: [PATCH 23/37] WIP; fixed method related. there is still an issue where some edges aren't added Signed-off-by: Colton McCurdy --- splitfile.go | 54 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/splitfile.go b/splitfile.go index 78502f0..62ab652 100644 --- a/splitfile.go +++ b/splitfile.go @@ -2,6 +2,7 @@ package splitfile import ( "errors" + "fmt" "go/ast" "go/token" "go/types" @@ -103,26 +104,28 @@ func addRelated(g graph.Graph, node *graph.Node) error { m := checkMethods(types.NewMethodSet(t.Type())) // it is important to add receivers first - for _, r := range m.receivers { - if !g.ContainsNode(r.ID) { - err := g.AddNode(r) + for _, v := range m { + if !g.ContainsNode(v.method.ID) { + err := g.AddNode(v.method) if err != nil { + fmt.Println("error adding method") continue } } - node.AddEdge(r, 5.0) // TODO (Issue #15): read value from config or use default - } + node.AddEdge(g[v.method.ID], 5.0) // TODO (Issue #15): read value from config or use default - for _, r := range m.other { - if !g.ContainsNode(r.ID) { - err := g.AddNode(r) - if err != nil { - continue + for _, r := range v.other { + if !g.ContainsNode(r.ID) { + err := g.AddNode(r) + if err != nil { + fmt.Println("error adding other") + continue + } } - } - node.AddEdge(r, 2.0) // TODO (Issue #15): read value from config or use default + v.method.AddEdge(r, 2.0) // params, results, etc should be on the method node, not the receiver node + } } // TODO: check other places for related (e.g., funcs, interfaces, etc.) @@ -130,17 +133,16 @@ func addRelated(g graph.Graph, node *graph.Node) error { return nil } -type methodSetResult struct { +type methodSetResult map[string]methodResult +type methodResult struct { + method *graph.Node receivers []*graph.Node other []*graph.Node } // checkMethods checks methods' signatures for related types. -func checkMethods(g graph.Graph, mset *types.MethodSet) methodSetResult { - res := methodSetResult{ - receivers: make([]*graph.Node, 0), - other: make([]*graph.Node, 0), - } +func checkMethods(mset *types.MethodSet) methodSetResult { + res := make(methodSetResult) for i := 0; i < mset.Len(); i++ { method := mset.At(i) @@ -151,19 +153,25 @@ func checkMethods(g graph.Graph, mset *types.MethodSet) methodSetResult { } m := graph.NewNode(Id(method.Obj()), method.Obj()) - res.other = append(res.other, m) // methods themselves are always related + v := methodResult{ + method: m, + receivers: make([]*graph.Node, 0), + other: make([]*graph.Node, 0), + } sigRes := checkSignature(sig) - res.receivers = append(res.receivers, sigRes.receivers...) - res.other = append(res.other, sigRes.other...) + v.receivers = append(v.receivers, sigRes.receivers...) + v.other = append(v.other, sigRes.other...) + + res[m.ID] = v } return res } // checkSignature checks a function signature. Specifically, the parameters and the return types. -func checkSignature(sig *types.Signature) methodSetResult { - res := methodSetResult{ +func checkSignature(sig *types.Signature) methodResult { + res := methodResult{ receivers: make([]*graph.Node, 0), other: make([]*graph.Node, 0), } From 1b51055f5fd3a86de67cda42bd8a75229a06db2e Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Thu, 5 Dec 2019 07:58:40 -0500 Subject: [PATCH 24/37] fix race condition between method node being added first and method being evaluated. this race condition lead to missing edges if method nodes were added first Signed-off-by: Colton McCurdy --- splitfile.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/splitfile.go b/splitfile.go index 62ab652..56916d2 100644 --- a/splitfile.go +++ b/splitfile.go @@ -2,7 +2,6 @@ package splitfile import ( "errors" - "fmt" "go/ast" "go/token" "go/types" @@ -108,7 +107,6 @@ func addRelated(g graph.Graph, node *graph.Node) error { if !g.ContainsNode(v.method.ID) { err := g.AddNode(v.method) if err != nil { - fmt.Println("error adding method") continue } } @@ -119,12 +117,11 @@ func addRelated(g graph.Graph, node *graph.Node) error { if !g.ContainsNode(r.ID) { err := g.AddNode(r) if err != nil { - fmt.Println("error adding other") continue } } - v.method.AddEdge(r, 2.0) // params, results, etc should be on the method node, not the receiver node + g[v.method.ID].AddEdge(r, 2.0) // params, results, etc should be on the method node, not the receiver node } } From d9dc68c32a7e3d9a2c084905dc0cc1bbc8e6e3ba Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Fri, 6 Dec 2019 07:29:35 -0500 Subject: [PATCH 25/37] recursiveBFS: needed to remove the element from the queue in order to terminate recursion! It completes now Signed-off-by: Colton McCurdy --- internal/graph/partition.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/internal/graph/partition.go b/internal/graph/partition.go index 61530a0..c33f7e4 100644 --- a/internal/graph/partition.go +++ b/internal/graph/partition.go @@ -29,6 +29,10 @@ func Partition(g Graph, epsilon float64) []WeightedEdge { edges := g.Edges() for _, root := range roots { + if len(root.Edges) == 0 { + continue + } + visited := make(map[string]bool) recursiveBFS(root, visited) @@ -133,7 +137,7 @@ func recursiveBFS(node *Node, visited map[string]bool) { for queue.Len() > 0 { n := queue.Front() - e, ok := n.Value.(*Node) + e, ok := queue.Remove(n).(*Node) if !ok { continue } From 6d19fecf92264040f7628eac4a821447e93557ae Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Mon, 9 Dec 2019 07:24:25 -0500 Subject: [PATCH 26/37] add debug logging to give additional information about the identified partitions Signed-off-by: Colton McCurdy --- splitfile.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/splitfile.go b/splitfile.go index 56916d2..15ddb94 100644 --- a/splitfile.go +++ b/splitfile.go @@ -5,6 +5,7 @@ import ( "go/ast" "go/token" "go/types" + "log" "golang.org/x/tools/go/analysis" "golang.org/x/tools/go/analysis/passes/inspect" @@ -33,12 +34,12 @@ func run(pass *analysis.Pass) (interface{}, error) { continue } - dest, ok := e.Dest.Object.(Poser) - if !ok { - continue - } + log.Printf("\n\tsrc: %+v \n\tdest: %+v\n", e.Source, e.Dest) - pass.Reportf(src.Pos(), "parition found between -> %+v", dest.Pos()) + pass.Report(analysis.Diagnostic{ + Pos: src.Pos(), + Message: "potential partition identified", + }) } return nil, nil From d32bf9c4c340e895687e440b89e1962e3d5fb494 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Tue, 10 Dec 2019 08:52:45 -0500 Subject: [PATCH 27/37] set and use defaultWeight of -1 Signed-off-by: Colton McCurdy --- internal/graph/node.go | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/internal/graph/node.go b/internal/graph/node.go index 1e33e23..3d3f59d 100644 --- a/internal/graph/node.go +++ b/internal/graph/node.go @@ -4,9 +4,7 @@ import ( "errors" ) -const ( - defaultConnectednessValue = -1 // default to an invalid, "infinity", value. -) +const defaultWeight = -1 // WeightedEdge contains pointers to source and destination Nodes as well as // the assigned weight of the relationship. @@ -36,10 +34,12 @@ type Node struct { // NewNode creates a pointer to a new Node with ID, id, and initializes a map of Edges. func NewNode(id string, v interface{}) *Node { return &Node{ - ID: id, - Object: v, - Edges: make(map[string]WeightedEdge), - Parents: make(map[string]WeightedEdge), + ID: id, + Object: v, + Edges: make(map[string]WeightedEdge), + Parents: make(map[string]WeightedEdge), + MinPathStrength: defaultWeight, + ShortestPathLen: defaultWeight, } } From 786e562deb8ea37579896fb681aeb6201bdc6176 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Tue, 10 Dec 2019 08:54:27 -0500 Subject: [PATCH 28/37] WIP; check for default values to avoid setting weights to defaults (lowest). add a log message and avoid division by 0 Signed-off-by: Colton McCurdy --- internal/graph/partition.go | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/internal/graph/partition.go b/internal/graph/partition.go index c33f7e4..c5fb036 100644 --- a/internal/graph/partition.go +++ b/internal/graph/partition.go @@ -2,6 +2,7 @@ package graph import ( "container/list" + "log" ) func shouldPartition(g Graph) bool { @@ -42,7 +43,9 @@ func Partition(g Graph, epsilon float64) []WeightedEdge { if edge.Source == root { continue } - edge.MinPathStrengths = append(edge.MinPathStrengths, edge.Source.MinPathStrength) + + mps := g[edge.Source.ID].Edges[edge.Dest.ID].MinPathStrengths + mps = append(mps, edge.Source.MinPathStrength) } } @@ -78,6 +81,10 @@ func calculateDistances(g Graph, edges []WeightedEdge) []WeightedEdge { sum += mps } + if sum == 0.0 { + continue + } + edge.Distance = 1 / (sum) res = append(res, edge) } @@ -92,6 +99,7 @@ func identifyPartitions(edges []WeightedEdge, epsilon float64) []WeightedEdge { for _, e := range edges { if e.Distance > epsilon { + log.Printf("identified partition with distance: %+v\n\tsrc: %+v \n\tdest: %+v\n", e.Distance, e.Source, e.Dest) res = append(res, e) } } @@ -114,8 +122,10 @@ func recursiveBFS(node *Node, visited map[string]bool) { updateChildren = true } + // Did the parent node see an edge with a lower weight than the edge weight + // between the child and parent node? If so, use the parent's min. if node.MinPathStrength < edge.Dest.MinPathStrength { - edge.Dest.MinPathStrength = node.ShortestPathLen + edge.Dest.MinPathStrength = node.MinPathStrength updateChildren = true } @@ -136,15 +146,24 @@ func recursiveBFS(node *Node, visited map[string]bool) { } for queue.Len() > 0 { - n := queue.Front() - e, ok := queue.Remove(n).(*Node) + d, ok := queue.Remove(queue.Front()).(*Node) if !ok { continue } - e.ShortestPathLen = node.ShortestPathLen + node.Edges[e.ID].Weight - e.MinPathStrength = node.Edges[e.ID].Weight + d.ShortestPathLen = node.Edges[d.ID].Weight + d.MinPathStrength = node.Edges[d.ID].Weight + + if node.ShortestPathLen != defaultWeight { + d.ShortestPathLen += node.ShortestPathLen + } + + if node.MinPathStrength != defaultWeight { + if node.MinPathStrength < d.MinPathStrength { + d.MinPathStrength = node.MinPathStrength + } + } - recursiveBFS(e, visited) + recursiveBFS(d, visited) } } From f6aae209eead8543633b7d4c5b12f7b6884ff83d Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Tue, 10 Dec 2019 08:54:59 -0500 Subject: [PATCH 29/37] WIP; fix weights. lower inidicates closer Signed-off-by: Colton McCurdy --- splitfile.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/splitfile.go b/splitfile.go index 15ddb94..75e8e78 100644 --- a/splitfile.go +++ b/splitfile.go @@ -5,7 +5,6 @@ import ( "go/ast" "go/token" "go/types" - "log" "golang.org/x/tools/go/analysis" "golang.org/x/tools/go/analysis/passes/inspect" @@ -34,8 +33,6 @@ func run(pass *analysis.Pass) (interface{}, error) { continue } - log.Printf("\n\tsrc: %+v \n\tdest: %+v\n", e.Source, e.Dest) - pass.Report(analysis.Diagnostic{ Pos: src.Pos(), Message: "potential partition identified", @@ -112,7 +109,8 @@ func addRelated(g graph.Graph, node *graph.Node) error { } } - node.AddEdge(g[v.method.ID], 5.0) // TODO (Issue #15): read value from config or use default + // Lower weight is better. It indicates "closeness" + node.AddEdge(g[v.method.ID], 2.0) // TODO (Issue #15): read value from config or use default for _, r := range v.other { if !g.ContainsNode(r.ID) { @@ -122,7 +120,7 @@ func addRelated(g graph.Graph, node *graph.Node) error { } } - g[v.method.ID].AddEdge(r, 2.0) // params, results, etc should be on the method node, not the receiver node + g[v.method.ID].AddEdge(r, 5.0) // params, results, etc should be on the method node, not the receiver node } } From 0333162375707bd5cfc2607cf9eae01a412b616c Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Wed, 11 Dec 2019 07:05:35 -0500 Subject: [PATCH 30/37] dont add default values to minpathweights Signed-off-by: Colton McCurdy --- internal/graph/partition.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/graph/partition.go b/internal/graph/partition.go index c5fb036..cf90242 100644 --- a/internal/graph/partition.go +++ b/internal/graph/partition.go @@ -40,7 +40,7 @@ func Partition(g Graph, epsilon float64) []WeightedEdge { for _, edge := range edges { // skip if the node is the root because we don't want to add '0' to the slice of // seen min paths. This could lead to division by zero. - if edge.Source == root { + if edge.Source == root || edge.Source.MinPathStrength == defaultWeight { continue } From 448de64d882e7db1c7a0b584184ac29a776eed85 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Wed, 11 Dec 2019 09:05:11 -0500 Subject: [PATCH 31/37] WIP; refactor recursiveBFS; builds, but havent verified correctness Signed-off-by: Colton McCurdy --- internal/graph/partition.go | 107 ++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 52 deletions(-) diff --git a/internal/graph/partition.go b/internal/graph/partition.go index cf90242..d721787 100644 --- a/internal/graph/partition.go +++ b/internal/graph/partition.go @@ -26,27 +26,22 @@ func shouldPartition(g Graph) bool { // 4. calculate edge distances // 5. identify edges where the sum of cross-iteration distances > epsilon func Partition(g Graph, epsilon float64) []WeightedEdge { - roots := g.Roots() - edges := g.Edges() + roots := make(map[string]*Node) + for _, r := range g.Roots() { + roots[r.ID] = r + } + + visited := make(map[string]bool) + recursiveBFS(roots, visited) - for _, root := range roots { - if len(root.Edges) == 0 { + edges := g.Edges() + for _, edge := range edges { + if edge.Source.MinPathStrength == defaultWeight { continue } - visited := make(map[string]bool) - recursiveBFS(root, visited) - - for _, edge := range edges { - // skip if the node is the root because we don't want to add '0' to the slice of - // seen min paths. This could lead to division by zero. - if edge.Source == root || edge.Source.MinPathStrength == defaultWeight { - continue - } - - mps := g[edge.Source.ID].Edges[edge.Dest.ID].MinPathStrengths - mps = append(mps, edge.Source.MinPathStrength) - } + mps := g[edge.Source.ID].Edges[edge.Dest.ID].MinPathStrengths + mps = append(mps, edge.Source.MinPathStrength) } dists := calculateDistances(g, edges) @@ -109,61 +104,69 @@ func identifyPartitions(edges []WeightedEdge, epsilon float64) []WeightedEdge { // recursiveBFS does recursive breadth-first search of the graph, keeping track // of shortest path and the mininimum path strength (i.e., the weaked edge in a path). -func recursiveBFS(node *Node, visited map[string]bool) { - queue := list.New() - visited[node.ID] = true +func recursiveBFS(level map[string]*Node, visited map[string]bool) { + evalQueue := list.New() + nextLevel := make(map[string]*Node) - for _, edge := range node.Edges { - if _, ok := visited[edge.Dest.ID]; ok { - var updateChildren bool + for _, node := range level { + if _, ok := visited[node.ID]; !ok { + evalQueue.PushBack(node) + continue + } - if edge.Weight < edge.Dest.MinPathStrength { - edge.Dest.MinPathStrength = edge.Weight - updateChildren = true + var reevaluate bool + for _, parentEdge := range node.Parents { + if parentEdge.Weight < node.MinPathStrength { + node.MinPathStrength = parentEdge.Weight + reevaluate = true } - // Did the parent node see an edge with a lower weight than the edge weight - // between the child and parent node? If so, use the parent's min. - if node.MinPathStrength < edge.Dest.MinPathStrength { - edge.Dest.MinPathStrength = node.MinPathStrength - updateChildren = true - } + parent := parentEdge.Source - pLen := node.ShortestPathLen + edge.Weight - if pLen < edge.Dest.ShortestPathLen { - edge.Dest.ShortestPathLen = pLen - updateChildren = true + if parent.MinPathStrength < node.MinPathStrength { + node.MinPathStrength = parent.MinPathStrength + reevaluate = true } - if updateChildren { - queue.PushBack(edge.Dest) + pathLen := parent.ShortestPathLen + parentEdge.Weight + if pathLen < node.ShortestPathLen { + node.ShortestPathLen = pathLen + reevaluate = true } - continue + if reevaluate { + evalQueue.PushBack(node) + } } - - queue.PushBack(edge.Dest) } - for queue.Len() > 0 { - d, ok := queue.Remove(queue.Front()).(*Node) + for evalQueue.Len() > 0 { + n, ok := evalQueue.Remove(evalQueue.Front()).(*Node) if !ok { continue } - d.ShortestPathLen = node.Edges[d.ID].Weight - d.MinPathStrength = node.Edges[d.ID].Weight - - if node.ShortestPathLen != defaultWeight { - d.ShortestPathLen += node.ShortestPathLen + for _, childEdge := range n.Edges { + nextLevel[childEdge.Dest.ID] = childEdge.Dest } - if node.MinPathStrength != defaultWeight { - if node.MinPathStrength < d.MinPathStrength { - d.MinPathStrength = node.MinPathStrength + for _, parentEdge := range n.Parents { + if n.MinPathStrength == defaultWeight || parentEdge.Weight < n.MinPathStrength { + n.MinPathStrength = parentEdge.Weight + } + + parent := parentEdge.Source + if parent.MinPathStrength < n.MinPathStrength { + n.MinPathStrength = parent.MinPathStrength + } + + pathLen := parent.ShortestPathLen + parentEdge.Weight + if n.ShortestPathLen == defaultWeight || pathLen < n.ShortestPathLen { + n.ShortestPathLen = pathLen } } - recursiveBFS(d, visited) + visited[n.ID] = true + recursiveBFS(nextLevel, visited) } } From 80418b2d273fbb705e71d1a98d96d2e31c6074a6 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Thu, 12 Dec 2019 08:07:23 -0500 Subject: [PATCH 32/37] fix reference bug where edges could be referencing different values for the same node Signed-off-by: Colton McCurdy --- splitfile.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/splitfile.go b/splitfile.go index 75e8e78..e40eee1 100644 --- a/splitfile.go +++ b/splitfile.go @@ -120,7 +120,7 @@ func addRelated(g graph.Graph, node *graph.Node) error { } } - g[v.method.ID].AddEdge(r, 5.0) // params, results, etc should be on the method node, not the receiver node + g[v.method.ID].AddEdge(g[r.ID], 5.0) // params, results, etc should be on the method node, not the receiver node } } From 4f34901cee05e5c2ea57143a0f5f08450e7ae9d6 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Mon, 16 Dec 2019 07:32:40 -0500 Subject: [PATCH 33/37] internal/graph: simplify recursiveBFS function Calculate shortest paths of children on the way "down" the graph. Signed-off-by: Colton McCurdy --- internal/graph/partition.go | 126 +++++++++++-------------------- internal/graph/partition_test.go | 57 ++++++++++++++ 2 files changed, 101 insertions(+), 82 deletions(-) diff --git a/internal/graph/partition.go b/internal/graph/partition.go index d721787..cecdbe3 100644 --- a/internal/graph/partition.go +++ b/internal/graph/partition.go @@ -19,10 +19,9 @@ func shouldPartition(g Graph) bool { // Partition returns a slice of WeightedEdges that should be partitioned (i.e., "broken"). // The pseudo-algorithm for Partition is as follows: // 1. find root nodes -// 2. find all edges in the graph -// 3. for each root -// a. recursively traverse the graph to identify the min shortest path to get to a node -// b. for each edge in the graph, store the min shortest path to a node from the root under observation +// 2. recursively traverse the graph +// a. keeping track of the current shortest path to a node +// b. keeping track of all observed shortest paths to a node // 4. calculate edge distances // 5. identify edges where the sum of cross-iteration distances > epsilon func Partition(g Graph, epsilon float64) []WeightedEdge { @@ -34,18 +33,50 @@ func Partition(g Graph, epsilon float64) []WeightedEdge { visited := make(map[string]bool) recursiveBFS(roots, visited) - edges := g.Edges() - for _, edge := range edges { - if edge.Source.MinPathStrength == defaultWeight { + dists := calculateDistances(g, g.Edges()) + return identifyPartitions(dists, epsilon) +} + +// recursiveBFS does recursive breadth-first search of the graph, keeping track +// of shortest path and the mininimum path strength (i.e., the weakest edge in a path). +func recursiveBFS(level map[string]*Node, visited map[string]bool) { + evalQueue := list.New() + nextLevel := make(map[string]*Node) + + for _, node := range level { + if _, ok := visited[node.ID]; !ok { + evalQueue.PushBack(node) + } + } + + for evalQueue.Len() > 0 { + n, ok := evalQueue.Remove(evalQueue.Front()).(*Node) + if !ok { continue } - mps := g[edge.Source.ID].Edges[edge.Dest.ID].MinPathStrengths - mps = append(mps, edge.Source.MinPathStrength) + for _, childEdge := range n.Edges { + nextLevel[childEdge.Dest.ID] = childEdge.Dest + + p := shortestPath(childEdge) + if p < childEdge.Dest.ShortestPath { + childEdge.Dest.ShortestPath = p + } + + childEdge.Source.ShortestPaths = append(childEdge.Dest.ShortestPaths, p) + } + + visited[n.ID] = true + recursiveBFS(nextLevel, visited) } +} - dists := calculateDistances(g, edges) - return identifyPartitions(dists, epsilon) +func shortestPath(edge WeightedEdge) float64 { + if edge.Source == nil || edge.Source.ShortestPath == defaultWeight { + return edge.Weight + } + + return edge.Source.ShortestPath + edge.Weight } // calculateDistances calculates the strongest strong distance of an edge from @@ -72,8 +103,8 @@ func calculateDistances(g Graph, edges []WeightedEdge) []WeightedEdge { for _, edge := range edges { var sum float64 - for _, mps := range edge.MinPathStrengths { - sum += mps + for _, sp := range edge.Dest.ShortestPaths { + sum += sp } if sum == 0.0 { @@ -101,72 +132,3 @@ func identifyPartitions(edges []WeightedEdge, epsilon float64) []WeightedEdge { return res } - -// recursiveBFS does recursive breadth-first search of the graph, keeping track -// of shortest path and the mininimum path strength (i.e., the weaked edge in a path). -func recursiveBFS(level map[string]*Node, visited map[string]bool) { - evalQueue := list.New() - nextLevel := make(map[string]*Node) - - for _, node := range level { - if _, ok := visited[node.ID]; !ok { - evalQueue.PushBack(node) - continue - } - - var reevaluate bool - for _, parentEdge := range node.Parents { - if parentEdge.Weight < node.MinPathStrength { - node.MinPathStrength = parentEdge.Weight - reevaluate = true - } - - parent := parentEdge.Source - - if parent.MinPathStrength < node.MinPathStrength { - node.MinPathStrength = parent.MinPathStrength - reevaluate = true - } - - pathLen := parent.ShortestPathLen + parentEdge.Weight - if pathLen < node.ShortestPathLen { - node.ShortestPathLen = pathLen - reevaluate = true - } - - if reevaluate { - evalQueue.PushBack(node) - } - } - } - - for evalQueue.Len() > 0 { - n, ok := evalQueue.Remove(evalQueue.Front()).(*Node) - if !ok { - continue - } - - for _, childEdge := range n.Edges { - nextLevel[childEdge.Dest.ID] = childEdge.Dest - } - - for _, parentEdge := range n.Parents { - if n.MinPathStrength == defaultWeight || parentEdge.Weight < n.MinPathStrength { - n.MinPathStrength = parentEdge.Weight - } - - parent := parentEdge.Source - if parent.MinPathStrength < n.MinPathStrength { - n.MinPathStrength = parent.MinPathStrength - } - - pathLen := parent.ShortestPathLen + parentEdge.Weight - if n.ShortestPathLen == defaultWeight || pathLen < n.ShortestPathLen { - n.ShortestPathLen = pathLen - } - } - - visited[n.ID] = true - recursiveBFS(nextLevel, visited) - } -} diff --git a/internal/graph/partition_test.go b/internal/graph/partition_test.go index e103ddd..c114e39 100644 --- a/internal/graph/partition_test.go +++ b/internal/graph/partition_test.go @@ -1 +1,58 @@ package graph + +import "testing" + +func Test_shortestPath(t *testing.T) { + type input struct { + edge WeightedEdge + } + + tests := map[string]struct { + input input + want float64 + }{ + "nil_source_node_with_edge_weight_2.0": { + input: input{ + edge: WeightedEdge{ + Weight: 2.0, + Source: nil, + }, + }, + want: 2.0, + }, + + "source_node_with_default_weight_edge_weight_2.0": { + input: input{ + edge: WeightedEdge{ + Weight: 2.0, + Source: &Node{ + ShortestPath: defaultWeight, + }, + }, + }, + want: 2.0, + }, + + "source_node_with_shortest_path_of_2.0_edge_weight_2.0": { + input: input{ + edge: WeightedEdge{ + Weight: 2.0, + Source: &Node{ + ShortestPath: 2.0, + }, + }, + }, + want: 4.0, + }, + } + + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + got := shortestPath(tt.input.edge) + + if got != tt.want { + t.Errorf("shortestPath() = %+v - want: %+v\n\tinput: %+v\n", got, tt.want, tt.input) + } + }) + } +} From b49641dcaa1a8eabc3842f3e9073cafbec8177c1 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Tue, 17 Dec 2019 07:15:20 -0500 Subject: [PATCH 34/37] internal/graph/partition: simplified bfs function * removed queueing of nodes in bfs * returned a value `changed` from shortestPath function to indicate whether the value returned is a new shortest or not. * added tests for bfs and updated tests for shortestPath Signed-off-by: Colton McCurdy --- internal/graph/partition.go | 58 ++++++----- internal/graph/partition_test.go | 161 +++++++++++++++++++++++++++---- 2 files changed, 171 insertions(+), 48 deletions(-) diff --git a/internal/graph/partition.go b/internal/graph/partition.go index cecdbe3..ad67e6b 100644 --- a/internal/graph/partition.go +++ b/internal/graph/partition.go @@ -1,7 +1,6 @@ package graph import ( - "container/list" "log" ) @@ -30,53 +29,50 @@ func Partition(g Graph, epsilon float64) []WeightedEdge { roots[r.ID] = r } - visited := make(map[string]bool) - recursiveBFS(roots, visited) + var curr, next map[string]*Node + curr = roots + + for len(next) > 0 { + next = bfs(curr) + curr = next + } dists := calculateDistances(g, g.Edges()) return identifyPartitions(dists, epsilon) } -// recursiveBFS does recursive breadth-first search of the graph, keeping track +// bfs does breadth-first search of the graph, keeping track // of shortest path and the mininimum path strength (i.e., the weakest edge in a path). -func recursiveBFS(level map[string]*Node, visited map[string]bool) { - evalQueue := list.New() - nextLevel := make(map[string]*Node) +func bfs(level map[string]*Node) map[string]*Node { + next := make(map[string]*Node) for _, node := range level { - if _, ok := visited[node.ID]; !ok { - evalQueue.PushBack(node) - } - } - - for evalQueue.Len() > 0 { - n, ok := evalQueue.Remove(evalQueue.Front()).(*Node) - if !ok { - continue - } + for _, childEdge := range node.Edges { + p, changed := shortestPath(childEdge) - for _, childEdge := range n.Edges { - nextLevel[childEdge.Dest.ID] = childEdge.Dest - - p := shortestPath(childEdge) - if p < childEdge.Dest.ShortestPath { + if changed { childEdge.Dest.ShortestPath = p + childEdge.Dest.ShortestPaths = append(childEdge.Dest.ShortestPaths, p) + next[childEdge.Dest.ID] = childEdge.Dest } - - childEdge.Source.ShortestPaths = append(childEdge.Dest.ShortestPaths, p) } - - visited[n.ID] = true - recursiveBFS(nextLevel, visited) } + + return next } -func shortestPath(edge WeightedEdge) float64 { - if edge.Source == nil || edge.Source.ShortestPath == defaultWeight { - return edge.Weight +func shortestPath(edge WeightedEdge) (res float64, changed bool) { + res = edge.Weight + + if edge.Source != nil && edge.Source.ShortestPath != defaultWeight { + res += edge.Source.ShortestPath + } + + if edge.Dest == nil || edge.Dest.ShortestPath == defaultWeight || res < edge.Dest.ShortestPath { + return res, true } - return edge.Source.ShortestPath + edge.Weight + return edge.Dest.ShortestPath, false } // calculateDistances calculates the strongest strong distance of an edge from diff --git a/internal/graph/partition_test.go b/internal/graph/partition_test.go index c114e39..83c82b3 100644 --- a/internal/graph/partition_test.go +++ b/internal/graph/partition_test.go @@ -1,57 +1,184 @@ package graph -import "testing" +import ( + "testing" + + "github.com/google/go-cmp/cmp" +) + +func Test_bfs(t *testing.T) { + var tests = map[string]struct { + input map[string]*Node + want map[string]*Node + }{ + "node_two_changed_children_should_return_two_next-level_nodes": { + input: map[string]*Node{"a": &Node{ + ID: "a", + Edges: map[string]WeightedEdge{ + "b": { + Weight: 1.0, + Source: &Node{ID: "a", ShortestPath: 1.0}, + Dest: &Node{ID: "b", ShortestPath: 3.0, ShortestPaths: []float64{3.0}, Edges: map[string]WeightedEdge{}}, + }, + "c": { + Weight: 4.0, + Source: &Node{ID: "a", ShortestPath: 1.0}, + Dest: &Node{ID: "c", ShortestPath: 6.0, ShortestPaths: []float64{6.0}, Edges: map[string]WeightedEdge{}}, + }, + }, + ShortestPath: 1.0, + ShortestPaths: []float64{1.0}, + }}, + want: map[string]*Node{ + "b": &Node{ + ID: "b", + Edges: map[string]WeightedEdge{}, + ShortestPath: 2.0, + ShortestPaths: []float64{3.0, 2.0}, + }, + "c": &Node{ + ID: "c", + Edges: map[string]WeightedEdge{}, + ShortestPath: 5.0, + ShortestPaths: []float64{6.0, 5.0}, + }, + }, + }, + } + + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + got := bfs(tt.input) + + if diff := cmp.Diff(got, tt.want); diff != "" { + t.Errorf("bfs() mismatch (-want +got):\n%s", diff) + } + }) + } +} func Test_shortestPath(t *testing.T) { type input struct { edge WeightedEdge } + type want struct { + shortest float64 + changed bool + } + tests := map[string]struct { input input - want float64 + want want }{ - "nil_source_node_with_edge_weight_2.0": { + "nil_source_node_with_edge_weight_less_than_current": { input: input{ edge: WeightedEdge{ Weight: 2.0, Source: nil, + Dest: &Node{ShortestPath: 4.0}, }, }, - want: 2.0, + want: want{ + shortest: 2.0, + changed: true, + }, }, - "source_node_with_default_weight_edge_weight_2.0": { + "nil_source_node_with_edge_weight_greater_than_current": { input: input{ edge: WeightedEdge{ Weight: 2.0, - Source: &Node{ - ShortestPath: defaultWeight, - }, + Source: nil, + Dest: &Node{ShortestPath: 1.0}, }, }, - want: 2.0, + want: want{ + shortest: 1.0, + changed: false, + }, }, - "source_node_with_shortest_path_of_2.0_edge_weight_2.0": { + "source_node_with_default_weight": { input: input{ edge: WeightedEdge{ Weight: 2.0, - Source: &Node{ - ShortestPath: 2.0, - }, + Source: &Node{ShortestPath: defaultWeight}, }, }, - want: 4.0, + want: want{ + shortest: 2.0, + changed: true, + }, + }, + + "nil_dest_node_new_shortest": { + input: input{ + edge: WeightedEdge{ + Weight: 2.0, + Source: &Node{ShortestPath: 1.0}, + Dest: nil, + }, + }, + want: want{ + shortest: 3.0, + changed: true, + }, + }, + + "dest_node_with_default_weight_force_new_shortest": { + input: input{ + edge: WeightedEdge{ + Weight: 2.0, + Source: &Node{ShortestPath: 3.0}, + Dest: &Node{ShortestPath: defaultWeight}, + }, + }, + want: want{ + shortest: 5.0, + changed: true, + }, + }, + + "new_shortest_path": { + input: input{ + edge: WeightedEdge{ + Weight: 2.0, + Source: &Node{ShortestPath: 2.0}, + Dest: &Node{ShortestPath: 5.0}, + }, + }, + want: want{ + shortest: 4.0, + changed: true, + }, + }, + + "equal_shortest_path": { + input: input{ + edge: WeightedEdge{ + Weight: 2.0, + Source: &Node{ShortestPath: 2.0}, + Dest: &Node{ShortestPath: 4.0}, + }, + }, + want: want{ + shortest: 4.0, + changed: false, + }, }, } for name, tt := range tests { t.Run(name, func(t *testing.T) { - got := shortestPath(tt.input.edge) + gotShortest, gotChanged := shortestPath(tt.input.edge) + + if gotShortest != tt.want.shortest { + t.Errorf("shortestPath() = %+v - want: %+v\n\tinput: %+v\n", gotShortest, tt.want.shortest, tt.input) + } - if got != tt.want { - t.Errorf("shortestPath() = %+v - want: %+v\n\tinput: %+v\n", got, tt.want, tt.input) + if gotChanged != tt.want.changed { + t.Errorf("shortestPath() = %+v - want: %+v\n\tinput: %+v\n", gotChanged, tt.want.changed, tt.input) } }) } From f24be6a253528dcecfa4788d7db66f820c0e208b Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Tue, 17 Dec 2019 07:18:34 -0500 Subject: [PATCH 35/37] internal/graph/node: use -1.0 as defaultWeight * remove deprecated fields for MinPathStrength * add field for keeping track of shortest paths distances to a node Signed-off-by: Colton McCurdy --- internal/graph/node.go | 19 +++++++++---------- internal/graph/node_test.go | 30 +++++++++++++++--------------- splitfile_test.go | 12 ++++++------ 3 files changed, 30 insertions(+), 31 deletions(-) diff --git a/internal/graph/node.go b/internal/graph/node.go index 3d3f59d..07f577a 100644 --- a/internal/graph/node.go +++ b/internal/graph/node.go @@ -4,7 +4,7 @@ import ( "errors" ) -const defaultWeight = -1 +const defaultWeight = -1.0 // WeightedEdge contains pointers to source and destination Nodes as well as // the assigned weight of the relationship. @@ -13,7 +13,6 @@ type WeightedEdge struct { Source, Dest *Node ConnectednessStrength float64 - MinPathStrengths []float64 // used across multiple graph traversals Partition bool Distance float64 @@ -27,19 +26,19 @@ type Node struct { Edges map[string]WeightedEdge Parents map[string]WeightedEdge // TODO: may be able to delete this now - MinPathStrength float64 - ShortestPathLen float64 // used for a single graph traversals + ShortestPath float64 + ShortestPaths []float64 } // NewNode creates a pointer to a new Node with ID, id, and initializes a map of Edges. func NewNode(id string, v interface{}) *Node { return &Node{ - ID: id, - Object: v, - Edges: make(map[string]WeightedEdge), - Parents: make(map[string]WeightedEdge), - MinPathStrength: defaultWeight, - ShortestPathLen: defaultWeight, + ID: id, + Object: v, + Edges: make(map[string]WeightedEdge), + Parents: make(map[string]WeightedEdge), + ShortestPath: defaultWeight, + ShortestPaths: make([]float64, 0), } } diff --git a/internal/graph/node_test.go b/internal/graph/node_test.go index f6914fb..7ffb9a8 100644 --- a/internal/graph/node_test.go +++ b/internal/graph/node_test.go @@ -7,21 +7,21 @@ import ( var ( nodeA = Node{ - ID: "a", - Object: nil, - Edges: map[string]WeightedEdge{}, - Parents: map[string]WeightedEdge{}, - MinPathStrength: 1.0, - ShortestPathLen: 1.0, + ID: "a", + Object: nil, + Edges: map[string]WeightedEdge{}, + Parents: map[string]WeightedEdge{}, + ShortestPath: 1.0, + ShortestPaths: []float64{1.0}, } nodeB = Node{ - ID: "b", - Object: nil, - Edges: map[string]WeightedEdge{}, - Parents: map[string]WeightedEdge{}, - MinPathStrength: 1.0, - ShortestPathLen: 1.0, + ID: "b", + Object: nil, + Edges: map[string]WeightedEdge{}, + Parents: map[string]WeightedEdge{}, + ShortestPath: 1.0, + ShortestPaths: []float64{1.0}, } ) @@ -48,9 +48,9 @@ func TestAddEdge(t *testing.T) { Dest: &nodeB, }, }, - Parents: map[string]WeightedEdge{}, - MinPathStrength: 1.0, - ShortestPathLen: 1.0, + Parents: map[string]WeightedEdge{}, + ShortestPath: 1.0, + ShortestPaths: []float64{1.0}, }, }, diff --git a/splitfile_test.go b/splitfile_test.go index f4a99b4..97633d5 100644 --- a/splitfile_test.go +++ b/splitfile_test.go @@ -15,12 +15,12 @@ import ( var ( emptyNode = graph.Node{ - ID: "node", - Object: nil, - Edges: make(map[string]graph.WeightedEdge), - Parents: make(map[string]graph.WeightedEdge), - MinPathStrength: 0.0, - ShortestPathLen: 0.0, + ID: "node", + Object: nil, + Edges: make(map[string]graph.WeightedEdge), + Parents: make(map[string]graph.WeightedEdge), + ShortestPath: -1.0, + ShortestPaths: make([]float64, 0), } ) From 3ae6e8272865f92ecd2c0198539814e3b842d17e Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Tue, 17 Dec 2019 07:34:27 -0500 Subject: [PATCH 36/37] internal/graph/partition: update bfs comment Signed-off-by: Colton McCurdy --- internal/graph/partition.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/internal/graph/partition.go b/internal/graph/partition.go index ad67e6b..f600f89 100644 --- a/internal/graph/partition.go +++ b/internal/graph/partition.go @@ -41,8 +41,7 @@ func Partition(g Graph, epsilon float64) []WeightedEdge { return identifyPartitions(dists, epsilon) } -// bfs does breadth-first search of the graph, keeping track -// of shortest path and the mininimum path strength (i.e., the weakest edge in a path). +// bfs does breadth-first search of the graph, keeping track of the current and past shortest paths. func bfs(level map[string]*Node) map[string]*Node { next := make(map[string]*Node) From f1e2a9ccabd89e47356910290ce20a83faf0d0f3 Mon Sep 17 00:00:00 2001 From: Colton McCurdy Date: Wed, 18 Dec 2019 08:22:21 -0500 Subject: [PATCH 37/37] WIP; need to write unit tests...enough of this manual checking Signed-off-by: Colton McCurdy --- internal/graph/partition.go | 9 +- splitfile_test.go | 205 +++++++++++++++++++----------------- 2 files changed, 113 insertions(+), 101 deletions(-) diff --git a/internal/graph/partition.go b/internal/graph/partition.go index f600f89..412eb08 100644 --- a/internal/graph/partition.go +++ b/internal/graph/partition.go @@ -34,6 +34,11 @@ func Partition(g Graph, epsilon float64) []WeightedEdge { for len(next) > 0 { next = bfs(curr) + + for id, n := range curr { + g[id] = n + } + curr = next } @@ -49,9 +54,10 @@ func bfs(level map[string]*Node) map[string]*Node { for _, childEdge := range node.Edges { p, changed := shortestPath(childEdge) + childEdge.Dest.ShortestPaths = append(childEdge.Dest.ShortestPaths, p) + if changed { childEdge.Dest.ShortestPath = p - childEdge.Dest.ShortestPaths = append(childEdge.Dest.ShortestPaths, p) next[childEdge.Dest.ID] = childEdge.Dest } } @@ -96,7 +102,6 @@ func calculateDistances(g Graph, edges []WeightedEdge) []WeightedEdge { res := make([]WeightedEdge, 0, len(edges)) for _, edge := range edges { - var sum float64 for _, sp := range edge.Dest.ShortestPaths { sum += sp diff --git a/splitfile_test.go b/splitfile_test.go index 97633d5..511c384 100644 --- a/splitfile_test.go +++ b/splitfile_test.go @@ -1,15 +1,6 @@ package splitfile import ( - "go/ast" - "go/importer" - "go/parser" - "go/token" - "go/types" - "log" - "reflect" - "testing" - "github.com/mccurdyc/splitfile/internal/graph" ) @@ -24,93 +15,109 @@ var ( } ) -func Test_addRelated(t *testing.T) { - var tests = []struct { - name string - graph graph.Graph - pkgpath string - files map[string]string - node *graph.Node - typeNameToCheck string - want graph.Graph - wantErr error - }{ - { - name: "type with no methods", - graph: make(graph.Graph), - pkgpath: "a", - files: map[string]string{"a/a.go": ` - package a - type a int - `}, - node: &emptyNode, - typeNameToCheck: "a", - want: make(graph.Graph), - wantErr: nil, - }, - - { - name: "type with a single method", - graph: make(graph.Graph), - pkgpath: "a", - files: map[string]string{"a/a.go": ` - package a - type a int - func (a a) Val() int { - return int(a) - } - `}, - node: &emptyNode, - typeNameToCheck: "a", - want: graph.Graph{ - "package a (\"a\") 49": &graph.Node{}, - "package a (\"a\") Val 43": &graph.Node{}, - "package a (\"a\") a 38": &graph.Node{}, - }, - wantErr: nil, - }, - } - - for _, tt := range tests { - tt := tt - t.Run(tt.name, func(t *testing.T) { - fset := token.NewFileSet() - var files []*ast.File - - for fname, fileContents := range tt.files { - f, err := parser.ParseFile(fset, fname, fileContents, 0) - if err != nil { - log.Fatal(err) - } - - files = append(files, f) - } - - conf := types.Config{Importer: importer.Default()} - pkg, err := conf.Check(tt.pkgpath, fset, files, nil) - if err != nil { - log.Fatal(err) - } - - tt.node.Object = pkg.Scope().Lookup(tt.typeNameToCheck) - - gotErr := addRelated(tt.graph, tt.node) - - if !reflect.DeepEqual(tt.want, tt.graph) { - t.Errorf("addRelated() mismatch:\n\twant: %+v\n\tgot: %+v", tt.want, tt.graph) - } - - // https://github.com/google/go-cmp/issues/24 - errorCmp := func(x, y error) bool { - if x == nil || y == nil { - return x == nil && y == nil - } - return x.Error() == y.Error() - } - - if ok := errorCmp(gotErr, tt.wantErr); !ok { - t.Errorf("addRelated() = %v, wantErr %v", gotErr, tt.wantErr) - } - }) - } -} +// func Test_addRelated(t *testing.T) { +// var tests = []struct { +// name string +// graph graph.Graph +// pkgpath string +// files map[string]string +// node *graph.Node +// typeNameToCheck string +// want graph.Graph +// wantErr error +// }{ +// { +// name: "type with no methods", +// graph: make(graph.Graph), +// pkgpath: "a", +// files: map[string]string{"a/a.go": ` +// package a +// type a int +// `}, +// node: &emptyNode, +// typeNameToCheck: "a", +// want: make(graph.Graph), +// wantErr: nil, +// }, +// +// { +// name: "type with a single method", +// graph: make(graph.Graph), +// pkgpath: "a", +// files: map[string]string{"a/a.go": ` +// package a +// type a int +// func (a a) Val() int { +// return int(a) +// } +// `}, +// node: &graph.Node{ +// ID: "package a (\"a\")", +// }, +// typeNameToCheck: "a", +// want: graph.Graph{ +// "package a (\"a\")": &graph.Node{}, +// "package a (\"a\") Val func() int": &graph.Node{ +// ID: "package a (\"a\") Val func() int", +// Object: nil, +// Edges: map[string]graph.WeightedEdge{}, +// Parents: map[string]graph.WeightedEdge{}, +// ShortestPath: -1.0, +// ShortestPaths: []float64{}, +// }, +// "package a (\"a\") a a.a": &graph.Node{ +// ID: "package a (\"a\") a a.a", +// Object: nil, +// Edges: map[string]graph.WeightedEdge{}, +// Parents: map[string]graph.WeightedEdge{}, +// ShortestPath: -1.0, +// ShortestPaths: []float64{}, +// }, +// }, +// wantErr: nil, +// }, +// } +// +// for _, tt := range tests { +// tt := tt +// t.Run(tt.name, func(t *testing.T) { +// fset := token.NewFileSet() +// var files []*ast.File +// +// for fname, fileContents := range tt.files { +// f, err := parser.ParseFile(fset, fname, fileContents, 0) +// if err != nil { +// log.Fatal(err) +// } +// +// files = append(files, f) +// } +// +// conf := types.Config{Importer: importer.Default()} +// pkg, err := conf.Check(tt.pkgpath, fset, files, nil) +// if err != nil { +// log.Fatal(err) +// } +// +// tt.node.Object = pkg.Scope().Lookup(tt.typeNameToCheck) +// +// gotErr := addRelated(tt.graph, tt.node) +// +// if !reflect.DeepEqual(tt.want, tt.graph) { +// t.Errorf("addRelated() mismatch:\n\twant: %+v\n\tgot: %+v", tt.want, tt.graph) +// } +// +// // https://github.com/google/go-cmp/issues/24 +// errorCmp := func(x, y error) bool { +// if x == nil || y == nil { +// return x == nil && y == nil +// } +// return x.Error() == y.Error() +// } +// +// if ok := errorCmp(gotErr, tt.wantErr); !ok { +// t.Errorf("addRelated() = %v, wantErr %v", gotErr, tt.wantErr) +// } +// }) +// } +// }