From 1b710c538e025aef899c3a09d89ceccdf49236ab Mon Sep 17 00:00:00 2001 From: Unnat Sharma Date: Wed, 14 May 2025 18:37:11 +0530 Subject: [PATCH 1/3] feat: introduction of control flow analysis for javascript files --- checkers/javascript/js_controlflow.go | 101 ++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 checkers/javascript/js_controlflow.go diff --git a/checkers/javascript/js_controlflow.go b/checkers/javascript/js_controlflow.go new file mode 100644 index 00000000..09afd20e --- /dev/null +++ b/checkers/javascript/js_controlflow.go @@ -0,0 +1,101 @@ +//globstar:registry-exclude + +package javascript + +import ( + sitter "github.com/smacker/go-tree-sitter" + "globstar.dev/analysis" +) + +var ControlFlowAnalyzer = &analysis.Analyzer{ + Name: "control_flow_analyzer", + Language: analysis.LangJs, + Description: "Create a Control Flow Graph for a javascript file", + Run: createControlFlowGraph, + Requires: []*analysis.Analyzer{ScopeAnalyzer}, +} + +type CFGNodeType int + +const ( + NodeTypeStatement CFGNodeType = iota + NodeTypeEntry // Marking entry point of a Basic Block + NodeTypeExit // Marking exit point of a Basic Block + NodeTypeFunction // Marking the start of a function node + NodeTypeFunctionCall // Marking a function call + NodeTypeReturn // Marking the return statement, or the end of a function +) + +// CFGNode is a node represeting a basic block in the control flow graph. +type CFGNode struct { + ID int + AstNode *sitter.Node + Type CFGNodeType + Successors []*CFGNode + Predecessors []*CFGNode + Scope *analysis.Scope + FunctionCtx *FunctionCFG // If the current node is a function declaration, link it to the individual Control Flow Node of the function. +} + +// FunctionCFG is a control flow graph for a function. +type FunctionCFG struct { + DeclarationNode *sitter.Node + Name string + EntryNode *CFGNode + ExitNodes []*CFGNode + Nodes []*CFGNode // All CFG nodes in this function +} + +type ControlFlowGraph struct { + FileEntryNode *CFGNode + FileExitNode *CFGNode + Functions map[*sitter.Node]*FunctionCFG + AllNodes map[int]*CFGNode + nextNodeID int +} + +func (cfg *ControlFlowGraph) CreateNode(node *sitter.Node, nodeType CFGNodeType, scope *analysis.Scope, functionCtx *FunctionCFG) *CFGNode { + cfgNode := &CFGNode{ + ID: cfg.nextNodeID, + AstNode: node, + Type: NodeTypeStatement, + Scope: scope, + FunctionCtx: functionCtx, + } + cfg.AllNodes[cfg.nextNodeID] = cfgNode + cfg.nextNodeID++ + + if functionCtx != nil { + functionCtx.Nodes = append(functionCtx.Nodes, cfgNode) + } + + return cfgNode +} + +func AddEdge(from *CFGNode, to *CFGNode) { + if from == nil || to == nil { + return + } + from.Successors = append(from.Successors, to) + to.Predecessors = append(to.Predecessors, from) +} + +// TODO: +// Focus on a simple control flow implementation first. +// Each function call creates a new control flow node +// Hoisting needs to be handled for function calls (ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Functions#function_hoisting) +// Hoisting only happens for function declarations, not function expressions. +// Gather all the function nodes prior to linking them. (A builder method?) +// Connecting the nodes will happen after the graph is fully created + +func createControlFlowGraph(pass *analysis.Pass) (interface{}, error) { + return nil, nil +} + +func collectFunctions(pass *analysis.Pass) (map[*sitter.Node]*FunctionCFG, error) { + return nil, nil +} + +func processFunctionDeclaration(cfg *ControlFlowGraph, node *sitter.Node) (*FunctionCFG, error) { + return nil, nil +} From a67e16ffd743ba2291d0504e5c66710661788d8b Mon Sep 17 00:00:00 2001 From: Unnat Sharma Date: Thu, 15 May 2025 23:01:11 +0530 Subject: [PATCH 2/3] feat: add basic call-graph generation for sequential statements and function calls + declarations --- checkers/javascript/js_controlflow.go | 426 +++++++++++++++++++-- checkers/javascript/js_controlflow_test.go | 30 ++ checkers/javascript/js_dataflow_test.go | 6 +- 3 files changed, 421 insertions(+), 41 deletions(-) create mode 100644 checkers/javascript/js_controlflow_test.go diff --git a/checkers/javascript/js_controlflow.go b/checkers/javascript/js_controlflow.go index 09afd20e..1e43e626 100644 --- a/checkers/javascript/js_controlflow.go +++ b/checkers/javascript/js_controlflow.go @@ -3,6 +3,10 @@ package javascript import ( + "fmt" + "slices" + "strings" + sitter "github.com/smacker/go-tree-sitter" "globstar.dev/analysis" ) @@ -15,64 +19,77 @@ var ControlFlowAnalyzer = &analysis.Analyzer{ Requires: []*analysis.Analyzer{ScopeAnalyzer}, } -type CFGNodeType int +type CfgBlockType int const ( - NodeTypeStatement CFGNodeType = iota - NodeTypeEntry // Marking entry point of a Basic Block - NodeTypeExit // Marking exit point of a Basic Block - NodeTypeFunction // Marking the start of a function node - NodeTypeFunctionCall // Marking a function call - NodeTypeReturn // Marking the return statement, or the end of a function + CfgBlockTypeStatement CfgBlockType = iota + CfgBlockTypeEntry // Marking entry point of a Basic Block + CfgBlockTypeExit // Marking exit point of a Basic Block + CfgBlockTypeFunction // Marking the start of a function node + CfgBlockTypeBasic // Marking a basic block ) +var BlockDeclNodes = []string{ + "function_declaration", + "if_statement", + "for_statement", +} + // CFGNode is a node represeting a basic block in the control flow graph. -type CFGNode struct { +type CfgBlock struct { ID int - AstNode *sitter.Node - Type CFGNodeType - Successors []*CFGNode - Predecessors []*CFGNode - Scope *analysis.Scope - FunctionCtx *FunctionCFG // If the current node is a function declaration, link it to the individual Control Flow Node of the function. + EnterNode *sitter.Node + Nodes []*sitter.Node + ExitNode *sitter.Node + Type CfgBlockType + Successors []*CfgBlock + Predecessors []*CfgBlock + FunctionCtx *FunctionCfgBlock // If the current node is a function declaration, link it to the individual Control Flow Node of the function. } -// FunctionCFG is a control flow graph for a function. -type FunctionCFG struct { +// FunctionCFGBlock is a control flow graph for a function. +type FunctionCfgBlock struct { DeclarationNode *sitter.Node Name string - EntryNode *CFGNode - ExitNodes []*CFGNode - Nodes []*CFGNode // All CFG nodes in this function + EntryNode *sitter.Node + ExitNodes []*sitter.Node + Nodes []*sitter.Node // All CFG nodes in this function } type ControlFlowGraph struct { - FileEntryNode *CFGNode - FileExitNode *CFGNode - Functions map[*sitter.Node]*FunctionCFG - AllNodes map[int]*CFGNode - nextNodeID int + FileEntryBlock *CfgBlock + FileExitBlock *CfgBlock + Functions map[string]*FunctionCfgBlock + AllBlocks map[int]*CfgBlock + nextNodeID int +} + +func NewControlFlowGraph() *ControlFlowGraph { + return &ControlFlowGraph{ + Functions: make(map[string]*FunctionCfgBlock), + AllBlocks: make(map[int]*CfgBlock), + nextNodeID: 0, + } } -func (cfg *ControlFlowGraph) CreateNode(node *sitter.Node, nodeType CFGNodeType, scope *analysis.Scope, functionCtx *FunctionCFG) *CFGNode { - cfgNode := &CFGNode{ +func (cfg *ControlFlowGraph) CreateBlock(node *sitter.Node, nodeType CfgBlockType, functionCtx *FunctionCfgBlock) (int, *CfgBlock) { + cfgNode := &CfgBlock{ ID: cfg.nextNodeID, - AstNode: node, - Type: NodeTypeStatement, - Scope: scope, + EnterNode: node, + Type: nodeType, FunctionCtx: functionCtx, } - cfg.AllNodes[cfg.nextNodeID] = cfgNode + cfg.AllBlocks[cfg.nextNodeID] = cfgNode cfg.nextNodeID++ if functionCtx != nil { - functionCtx.Nodes = append(functionCtx.Nodes, cfgNode) + functionCtx.Nodes = append(functionCtx.Nodes, node) } - return cfgNode + return cfg.nextNodeID - 1, cfgNode } -func AddEdge(from *CFGNode, to *CFGNode) { +func AddEdge(from *CfgBlock, to *CfgBlock) { if from == nil || to == nil { return } @@ -87,15 +104,348 @@ func AddEdge(from *CFGNode, to *CFGNode) { // Hoisting only happens for function declarations, not function expressions. // Gather all the function nodes prior to linking them. (A builder method?) // Connecting the nodes will happen after the graph is fully created +// Add functionality to handle function calls and hoisting. func createControlFlowGraph(pass *analysis.Pass) (interface{}, error) { - return nil, nil + cfg := NewControlFlowGraph() + err := cfg.collectFunctions(pass) + + if err != nil { + return nil, err + } + + analysis.Preorder(pass, func(node *sitter.Node) { + if node == nil { + return + } + + if node.Type() == "program" { + if node.ChildCount() < 0 { + return + } + cfg.CreateBlock(node, CfgBlockTypeEntry, nil) + + children := int(node.ChildCount()) + for i := 0; i < children; i++ { + child := node.Child(i) + fmt.Println("child", child.Type()) + if slices.Contains(BlockDeclNodes, child.Type()) { + continue + // Need a way to only attach the function decl block, when it is called in the actual code. + // Right now it's part of the CFG even if it's not called. + // if child.Type() == "function_declaration" { + // block := cfg.Functions[child.ChildByFieldName("name").Content(pass.FileContext.Source)] + // funcBlockIndex, _ := cfg.CreateBlock(child, CfgBlockTypeFunction, block) + // AddEdge(cfg.AllBlocks[funcBlockIndex-1], cfg.AllBlocks[funcBlockIndex]) + // } + } + if child.Type() == "expression_statement" { + callExp := child.Child(0) + if callExp == nil { + continue + } + fmt.Println("callExp", child.Child(0).Child(0).Type()) + funcNameStr := callExp.ChildByFieldName("function").Content(pass.FileContext.Source) + block := cfg.Functions[funcNameStr] + fmt.Println("block", block) + if block == nil { + continue + } + funcBlockIndex, _ := cfg.CreateBlock(block.DeclarationNode, CfgBlockTypeFunction, block) + AddEdge(cfg.AllBlocks[funcBlockIndex-1], cfg.AllBlocks[funcBlockIndex]) + continue + } + basicBlockID, basicBlock := cfg.CreateBlock(child, CfgBlockTypeBasic, nil) + AddEdge(cfg.AllBlocks[basicBlockID-1], basicBlock) + + j := i + for j < children && !slices.Contains(BlockDeclNodes, node.Child(j).Type()) { + basicBlock.Nodes = append(basicBlock.Nodes, node.Child(j)) + j++ + } + i = j - 1 // -1 because the outer loop will increment i + + } + exitBlockID, exitBlock := cfg.CreateBlock(nil, CfgBlockTypeExit, nil) + AddEdge(cfg.AllBlocks[exitBlockID-1], exitBlock) + } else { + return + } + + }) + + // For debugging, you can uncomment this line to see the CFG immediately: + fmt.Println(cfg.GenerateDOTWithSource(pass.FileContext.Source)) + + return cfg, nil } -func collectFunctions(pass *analysis.Pass) (map[*sitter.Node]*FunctionCFG, error) { - return nil, nil +func (cfg *ControlFlowGraph) collectFunctions(pass *analysis.Pass) error { + functions := make(map[string]*FunctionCfgBlock) + + analysis.Preorder(pass, func(node *sitter.Node) { + if node.Type() == "function_declaration" { + funcName := node.ChildByFieldName("name") + cfgNode, err := cfg.processFunctionDeclaration(pass, node) + if err != nil { + return + } + functions[funcName.Content(pass.FileContext.Source)] = cfgNode + } + }) + cfg.Functions = functions + + return nil } -func processFunctionDeclaration(cfg *ControlFlowGraph, node *sitter.Node) (*FunctionCFG, error) { - return nil, nil +func (cfg *ControlFlowGraph) processFunctionDeclaration(pass *analysis.Pass, node *sitter.Node) (*FunctionCfgBlock, error) { + if node.Type() != "function_declaration" { + return nil, nil + } + + funcName := node.ChildByFieldName("name") + if funcName == nil { + return nil, fmt.Errorf("function declaration has no name") + } + + cfgNode := &FunctionCfgBlock{ + DeclarationNode: node, + Name: funcName.Content(pass.FileContext.Source), + EntryNode: node, + ExitNodes: make([]*sitter.Node, 0), + } + body := node.ChildByFieldName("body") + bodyChildCount := body.NamedChildCount() + for i := 0; i < int(bodyChildCount); i++ { + child := body.NamedChild(i) + if child.Type() == "return_statement" { + cfgNode.ExitNodes = append(cfgNode.ExitNodes, child) + } else { + cfgNode.Nodes = append(cfgNode.ExitNodes, child) + } + } + // cfg.CreateBlock(node, CfgBlockTypeFunction, cfgNode) + + return cfgNode, nil +} + +// Print outputs the entire Control Flow Graph structure in a readable format +func (cfg *ControlFlowGraph) Print() string { + var result string + + result += "Control Flow Graph:\n" + result += fmt.Sprintf("Total Blocks: %d\n\n", len(cfg.AllBlocks)) + + // Print all blocks in order + for i := 0; i < len(cfg.AllBlocks); i++ { + block := cfg.AllBlocks[i] + if block == nil { + continue + } + + // Block header + result += fmt.Sprintf("Block ID: %d (Type: %s)\n", block.ID, blockTypeToString(block.Type)) + + // Show node type + if block.EnterNode != nil { + result += fmt.Sprintf(" Enter Node: %s\n", block.EnterNode.Type()) + } + + // List child nodes + if len(block.Nodes) > 0 { + result += " Nodes:\n" + for _, node := range block.Nodes { + result += fmt.Sprintf(" - %s\n", node.Type()) + } + } + + // Show function context if applicable + if block.FunctionCtx != nil { + result += fmt.Sprintf(" Function: %s\n", block.FunctionCtx.Name) + } + + // Show edges (connections between blocks) + if len(block.Predecessors) > 0 { + result += " Predecessors:" + for _, pred := range block.Predecessors { + result += fmt.Sprintf(" %d", pred.ID) + } + result += "\n" + } + + if len(block.Successors) > 0 { + result += " Successors:" + for _, succ := range block.Successors { + result += fmt.Sprintf(" %d", succ.ID) + } + result += "\n" + } + + result += "\n" + } + + // Print functions + if len(cfg.Functions) > 0 { + result += "Functions:\n" + for _, function := range cfg.Functions { + result += fmt.Sprintf(" %s:\n", function.Name) + result += fmt.Sprintf(" Entry: %s\n", function.EntryNode.Type()) + + if len(function.ExitNodes) > 0 { + result += " Exit Nodes:\n" + for _, exitNode := range function.ExitNodes { + result += fmt.Sprintf(" - %s\n", exitNode.Type()) + } + } + + result += "\n" + } + } + + return result +} + +// Helper function to convert block type to string representation +func blockTypeToString(blockType CfgBlockType) string { + switch blockType { + case CfgBlockTypeStatement: + return "Statement" + case CfgBlockTypeEntry: + return "Entry" + case CfgBlockTypeExit: + return "Exit" + case CfgBlockTypeFunction: + return "Function" + case CfgBlockTypeBasic: + return "Basic" + default: + return "Unknown" + } +} + +// GenerateDOT creates a DOT graph representation of the CFG for visualization +func (cfg *ControlFlowGraph) GenerateDOT() string { + return cfg.GenerateDOTWithSource(nil) +} + +// GenerateDOTWithSource creates a DOT graph representation of the CFG including source code snippets +func (cfg *ControlFlowGraph) GenerateDOTWithSource(source []byte) string { + var result string + + // Start DOT graph + result += "digraph CFG {\n" + result += " node [shape=box];\n" + + // Define nodes + for _, block := range cfg.AllBlocks { + if block == nil { + continue + } + + nodeLabel := fmt.Sprintf("Block %d\\n%s", block.ID, blockTypeToString(block.Type)) + + // Add node type info + if block.EnterNode != nil { + nodeLabel += fmt.Sprintf("\\n%s", block.EnterNode.Type()) + } + + // Add function context if applicable + if block.FunctionCtx != nil { + nodeLabel += fmt.Sprintf("\\nFunction: %s", block.FunctionCtx.Name) + } + + // Add all nodes in this block with their statement representation + if len(block.Nodes) > 0 { + nodeLabel += "\\n\\nStatements:" + for _, node := range block.Nodes { + // Add node type + nodeLabel += fmt.Sprintf("\\n- %s", node.Type()) + + // Add code snippet if source code is available + if source != nil && node.StartByte() < uint32(len(source)) && node.EndByte() <= uint32(len(source)) { + snippet := string(source[node.StartByte():node.EndByte()]) + + // Clean the snippet for DOT format + snippet = escapeForDot(snippet) + + // Truncate if too long + if len(snippet) > 30 { + snippet = snippet[:27] + "..." + } + + nodeLabel += fmt.Sprintf(": %s", snippet) + } + } + } + + // Node style based on type + nodeStyle := "" + switch block.Type { + case CfgBlockTypeEntry: + nodeStyle = ", color=green" + case CfgBlockTypeExit: + nodeStyle = ", color=red" + case CfgBlockTypeFunction: + nodeStyle = ", color=blue, style=filled, fillcolor=lightblue" + } + + result += fmt.Sprintf(" node%d [label=\"%s\"%s];\n", block.ID, nodeLabel, nodeStyle) + } + + // Define edges + for _, block := range cfg.AllBlocks { + if block == nil { + continue + } + + for _, succ := range block.Successors { + result += fmt.Sprintf(" node%d -> node%d;\n", block.ID, succ.ID) + } + } + + // End DOT graph + result += "}\n" + + return result +} + +// escapeForDot escapes special characters in strings for DOT format +func escapeForDot(s string) string { + // Replace newlines with \n + s = strings.ReplaceAll(s, "\n", "\\n") + + // Replace quotes with escaped quotes + s = strings.ReplaceAll(s, "\"", "\\\"") + + // Replace backslashes with escaped backslashes + s = strings.ReplaceAll(s, "\\", "\\\\") + + return s +} + +// PrintCFG is a utility function that can be used to print a CFG from analyzer results +func PrintCFG(result interface{}) string { + cfg, ok := result.(*ControlFlowGraph) + if !ok { + return "Error: Result is not a ControlFlowGraph" + } + return cfg.Print() +} + +// PrintCFGDOT returns the DOT representation of the CFG from analyzer results +func PrintCFGDOT(result interface{}) string { + cfg, ok := result.(*ControlFlowGraph) + if !ok { + return "Error: Result is not a ControlFlowGraph" + } + return cfg.GenerateDOT() +} + +// PrintCFGDOTWithSource returns the DOT representation with source code snippets +func PrintCFGDOTWithSource(result interface{}, source []byte) string { + cfg, ok := result.(*ControlFlowGraph) + if !ok { + return "Error: Result is not a ControlFlowGraph" + } + return cfg.GenerateDOTWithSource(source) } diff --git a/checkers/javascript/js_controlflow_test.go b/checkers/javascript/js_controlflow_test.go new file mode 100644 index 00000000..cb63b90a --- /dev/null +++ b/checkers/javascript/js_controlflow_test.go @@ -0,0 +1,30 @@ +package javascript + +import ( + "testing" + + "github.com/stretchr/testify/require" + ana "globstar.dev/analysis" +) + +func TestControlFlowGraph(t *testing.T) { + source := ` + let a = 21; + let b = 22; + f(a); + function f(x) { + let y = x + 1; + return y; + } + + f(a); + ` + parseResult := ParseJsCode(t, []byte(source)) + pass := &ana.Pass{ + Analyzer: ControlFlowAnalyzer, + FileContext: parseResult, + } + + _, err := createControlFlowGraph(pass) + require.NoError(t, err) +} diff --git a/checkers/javascript/js_dataflow_test.go b/checkers/javascript/js_dataflow_test.go index 1848dd4a..c04faf3c 100644 --- a/checkers/javascript/js_dataflow_test.go +++ b/checkers/javascript/js_dataflow_test.go @@ -8,7 +8,7 @@ import ( ana "globstar.dev/analysis" ) -func parseJsCode(t *testing.T, source []byte) *ana.ParseResult { +func ParseJsCode(t *testing.T, source []byte) *ana.ParseResult { pass, err := ana.Parse("", source, ana.LangJs, ana.LangJs.Grammar()) require.NoError(t, err) @@ -144,7 +144,7 @@ func TestDataFlowAnalysis(t *testing.T) { f(x) ` - parseResult := parseJsCode(t, []byte(source)) + parseResult := ParseJsCode(t, []byte(source)) pass := &ana.Pass{ Analyzer: DataFlowAnalyzer, FileContext: parseResult, @@ -187,7 +187,7 @@ func TestClassDataFlow(t *testing.T) { ` - parseResult := parseJsCode(t, []byte(source)) + parseResult := ParseJsCode(t, []byte(source)) pass := &ana.Pass{ Analyzer: DataFlowAnalyzer, FileContext: parseResult, From f861af9437282e250ad8dd1ccf9bd66567c7a75e Mon Sep 17 00:00:00 2001 From: Unnat Sharma Date: Fri, 16 May 2025 20:09:19 +0530 Subject: [PATCH 3/3] feat: add functionality to handle conditional nodes in the cfg --- checkers/javascript/js_controlflow.go | 105 ++++++++++++++++----- checkers/javascript/js_controlflow_test.go | 41 ++++++-- 2 files changed, 111 insertions(+), 35 deletions(-) diff --git a/checkers/javascript/js_controlflow.go b/checkers/javascript/js_controlflow.go index 1e43e626..918c9223 100644 --- a/checkers/javascript/js_controlflow.go +++ b/checkers/javascript/js_controlflow.go @@ -22,11 +22,12 @@ var ControlFlowAnalyzer = &analysis.Analyzer{ type CfgBlockType int const ( - CfgBlockTypeStatement CfgBlockType = iota - CfgBlockTypeEntry // Marking entry point of a Basic Block - CfgBlockTypeExit // Marking exit point of a Basic Block - CfgBlockTypeFunction // Marking the start of a function node - CfgBlockTypeBasic // Marking a basic block + CfgBlockTypeStatement CfgBlockType = iota + CfgBlockTypeEntry // Marking entry point of a Basic Block + CfgBlockTypeExit // Marking exit point of a Basic Block + CfgBlockTypeFunction // Marking the start of a function node + CfgBlockTypeBasic // Marking a basic block + CfgBlockTypeConditional // Marking a conditional block ) var BlockDeclNodes = []string{ @@ -98,13 +99,16 @@ func AddEdge(from *CfgBlock, to *CfgBlock) { } // TODO: -// Focus on a simple control flow implementation first. -// Each function call creates a new control flow node -// Hoisting needs to be handled for function calls (ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Functions#function_hoisting) -// Hoisting only happens for function declarations, not function expressions. -// Gather all the function nodes prior to linking them. (A builder method?) -// Connecting the nodes will happen after the graph is fully created -// Add functionality to handle function calls and hoisting. +// Focus on a simple control flow implementation first. ✅ +// Each function call creates a new control flow node ✅ +// Hoisting needs to be handled for function calls (ref: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Functions#function_hoisting) ✅ +// Hoisting only happens for function declarations, not function expressions. ✅ +// Gather all the function nodes prior to linking them. (A builder method?) ✅ +// Connecting the nodes will happen after the graph is fully created ✅ +// Add functionality to handle function calls and hoisting. ✅ +// Handle Conditional Expressions. ✅ +// Handle Loops. +// Handle Conditions inside functions. func createControlFlowGraph(pass *analysis.Pass) (interface{}, error) { cfg := NewControlFlowGraph() @@ -128,26 +132,20 @@ func createControlFlowGraph(pass *analysis.Pass) (interface{}, error) { children := int(node.ChildCount()) for i := 0; i < children; i++ { child := node.Child(i) - fmt.Println("child", child.Type()) if slices.Contains(BlockDeclNodes, child.Type()) { + switch child.Type() { + case "if_statement": + cfg.processIfStatement(cfg.nextNodeID-1 /*pass,*/, child) + } continue - // Need a way to only attach the function decl block, when it is called in the actual code. - // Right now it's part of the CFG even if it's not called. - // if child.Type() == "function_declaration" { - // block := cfg.Functions[child.ChildByFieldName("name").Content(pass.FileContext.Source)] - // funcBlockIndex, _ := cfg.CreateBlock(child, CfgBlockTypeFunction, block) - // AddEdge(cfg.AllBlocks[funcBlockIndex-1], cfg.AllBlocks[funcBlockIndex]) - // } } if child.Type() == "expression_statement" { callExp := child.Child(0) if callExp == nil { continue } - fmt.Println("callExp", child.Child(0).Child(0).Type()) funcNameStr := callExp.ChildByFieldName("function").Content(pass.FileContext.Source) block := cfg.Functions[funcNameStr] - fmt.Println("block", block) if block == nil { continue } @@ -224,11 +222,55 @@ func (cfg *ControlFlowGraph) processFunctionDeclaration(pass *analysis.Pass, nod cfgNode.Nodes = append(cfgNode.ExitNodes, child) } } - // cfg.CreateBlock(node, CfgBlockTypeFunction, cfgNode) return cfgNode, nil } +func (cfg *ControlFlowGraph) processIfStatement(lastBlockID int /*pass *analysis.Pass,*/, node *sitter.Node) (int, *CfgBlock, error) { + // Need seperate blocks. Seperating the branches of the if statements (true and false) ✅ + // Need to handle the else if and else statements. ✅ + // Link all the leaf nodes to the exit block. How about we return all the leaf nodes Index and link them in the main loop ? + + condition := node.ChildByFieldName("condition") + consequent := node.ChildByFieldName("consequence") + elseConditions := node.ChildByFieldName("alternative") + + ifBlockID, ifBlock := cfg.CreateBlock(node, CfgBlockTypeConditional, nil) + if condition != nil { + ifBlock.Nodes = append(ifBlock.Nodes, condition) + } + AddEdge(cfg.AllBlocks[lastBlockID], ifBlock) + + if consequent != nil { + _, trueBlock := cfg.CreateBlock(consequent, CfgBlockTypeBasic, nil) + trueBlock.Nodes = append(trueBlock.Nodes, consequent) + AddEdge(cfg.AllBlocks[ifBlockID], trueBlock) + } + + if elseConditions != nil { + if int(elseConditions.ChildCount()) < 2 { + return ifBlockID, ifBlock, nil + } + switch elseConditions.Child(1).Type() { + case "if_statement": + cfg.processIfStatement(ifBlockID, elseConditions.Child(1)) + default: + fmt.Println("else_clause", elseConditions.Child(1)) + elseBlockID, elseBlock := cfg.CreateBlock(elseConditions, CfgBlockTypeBasic, nil) + // elseBlock.Nodes = append(elseBlock.Nodes, elseConditions) + AddEdge(cfg.AllBlocks[ifBlockID], elseBlock) + _, falseBlock := cfg.CreateBlock(elseConditions, CfgBlockTypeBasic, nil) + if elseConditions.Child(1) != nil { + falseBlock.Nodes = append(falseBlock.Nodes, elseConditions.Child(1)) + } + AddEdge(cfg.AllBlocks[elseBlockID], falseBlock) + } + + } + + return ifBlockID, ifBlock, nil +} + // Print outputs the entire Control Flow Graph structure in a readable format func (cfg *ControlFlowGraph) Print() string { var result string @@ -318,6 +360,8 @@ func blockTypeToString(blockType CfgBlockType) string { return "Function" case CfgBlockTypeBasic: return "Basic" + case CfgBlockTypeConditional: + return "Conditional" default: return "Unknown" } @@ -358,6 +402,10 @@ func (cfg *ControlFlowGraph) GenerateDOTWithSource(source []byte) string { if len(block.Nodes) > 0 { nodeLabel += "\\n\\nStatements:" for _, node := range block.Nodes { + if node == nil { + continue + } + // Add node type nodeLabel += fmt.Sprintf("\\n- %s", node.Type()) @@ -387,6 +435,8 @@ func (cfg *ControlFlowGraph) GenerateDOTWithSource(source []byte) string { nodeStyle = ", color=red" case CfgBlockTypeFunction: nodeStyle = ", color=blue, style=filled, fillcolor=lightblue" + case CfgBlockTypeConditional: + nodeStyle = ", color=purple, style=filled, fillcolor=lavender" } result += fmt.Sprintf(" node%d [label=\"%s\"%s];\n", block.ID, nodeLabel, nodeStyle) @@ -399,6 +449,9 @@ func (cfg *ControlFlowGraph) GenerateDOTWithSource(source []byte) string { } for _, succ := range block.Successors { + if succ == nil { + continue + } result += fmt.Sprintf(" node%d -> node%d;\n", block.ID, succ.ID) } } @@ -411,15 +464,15 @@ func (cfg *ControlFlowGraph) GenerateDOTWithSource(source []byte) string { // escapeForDot escapes special characters in strings for DOT format func escapeForDot(s string) string { + // Replace backslashes first to avoid double-escaping + s = strings.ReplaceAll(s, "\\", "\\\\") + // Replace newlines with \n s = strings.ReplaceAll(s, "\n", "\\n") // Replace quotes with escaped quotes s = strings.ReplaceAll(s, "\"", "\\\"") - // Replace backslashes with escaped backslashes - s = strings.ReplaceAll(s, "\\", "\\\\") - return s } diff --git a/checkers/javascript/js_controlflow_test.go b/checkers/javascript/js_controlflow_test.go index cb63b90a..fe617933 100644 --- a/checkers/javascript/js_controlflow_test.go +++ b/checkers/javascript/js_controlflow_test.go @@ -8,7 +8,8 @@ import ( ) func TestControlFlowGraph(t *testing.T) { - source := ` + t.Run("function_calls", func(t *testing.T) { + source := ` let a = 21; let b = 22; f(a); @@ -19,12 +20,34 @@ func TestControlFlowGraph(t *testing.T) { f(a); ` - parseResult := ParseJsCode(t, []byte(source)) - pass := &ana.Pass{ - Analyzer: ControlFlowAnalyzer, - FileContext: parseResult, - } - - _, err := createControlFlowGraph(pass) - require.NoError(t, err) + parseResult := ParseJsCode(t, []byte(source)) + pass := &ana.Pass{ + Analyzer: ControlFlowAnalyzer, + FileContext: parseResult, + } + + _, err := createControlFlowGraph(pass) + require.NoError(t, err) + }) + + t.Run("if_statements", func(t *testing.T) { + source := ` + if (a > b) { + console.log("a is greater than b"); + }else if(a