From a57b179e4145f3561d7446ad34b5b8fb4f7c5e58 Mon Sep 17 00:00:00 2001 From: Aadhav Date: Tue, 25 Feb 2025 21:35:30 +0530 Subject: [PATCH 1/3] introduce custom analyzers for enhanced analysis Signed-off-by: burntcarrot --- .../python/security_hardcoded_tokens.test.py | 30 ++++++++++++++++ checkers/python/security_hardcoded_tokens.yml | 32 +++++++++++++++++ pkg/analysis/analyze.go | 35 +++++++++++++++++++ pkg/analysis/analyzers/entropy.go | 32 +++++++++++++++++ pkg/analysis/pattern_rule.go | 23 +++++++++--- 5 files changed, 147 insertions(+), 5 deletions(-) create mode 100644 checkers/python/security_hardcoded_tokens.test.py create mode 100644 checkers/python/security_hardcoded_tokens.yml create mode 100644 pkg/analysis/analyzers/entropy.go diff --git a/checkers/python/security_hardcoded_tokens.test.py b/checkers/python/security_hardcoded_tokens.test.py new file mode 100644 index 00000000..006ba7d8 --- /dev/null +++ b/checkers/python/security_hardcoded_tokens.test.py @@ -0,0 +1,30 @@ +import os + +def main(): + # These should be flagged + # + connect(host="example.com", token="hqd#18ey283y28wdbbcwbd1ueh1ue2h") + # + set_password(password="A3b$c8d!eF9gHiJkLmNoPqRsTuVwXyZ") + # + configure(key="AKIATESTKEYTESTKEYTESTKEYTEST", secret="TEST/SECRET/KEY/EXAMPLE/1234567890") + + # These should NOT be flagged + # + set_password(password="password123") # Low entropy + # + configure(username="test_user", value=42) # Not a sensitive argument + + # Should not flag non-string values + # + set_token(token=os.getenv("API_TOKEN")) + + # Should not flag commented out code + # + # connect(host="example.com", token="commented_out_secret") + + # Edge cases + # + empty_string(arg="") # Empty string + # + numeric_value(key=12345) # Not a string \ No newline at end of file diff --git a/checkers/python/security_hardcoded_tokens.yml b/checkers/python/security_hardcoded_tokens.yml new file mode 100644 index 00000000..92b180fe --- /dev/null +++ b/checkers/python/security_hardcoded_tokens.yml @@ -0,0 +1,32 @@ +language: python +name: security_hardcoded_tokens +message: "Potential hardcoded credential detected" +category: security +severity: warning + +pattern: | + ((call + function: (identifier) @func_name + arguments: (argument_list + (keyword_argument + name: (identifier) @arg_name + value: (string + (string_content) @token_val + ) + ) + ) + ) + (#match? @arg_name "^(token|secret|key|password)$")) @security_hardcoded_tokens + +analyzers: + - name: entropy + capture: "token_val" + config: + min: 3.5 + +exclude: + - "test/**" + - "**/*_test.py" + +description: | + Hardcoded credentials pose a significant security risk. Use secure secret management systems instead of embedding secrets directly in code. \ No newline at end of file diff --git a/pkg/analysis/analyze.go b/pkg/analysis/analyze.go index b0716420..283a50fe 100644 --- a/pkg/analysis/analyze.go +++ b/pkg/analysis/analyze.go @@ -6,6 +6,7 @@ import ( "path/filepath" sitter "github.com/smacker/go-tree-sitter" + "globstar.dev/pkg/analysis/analyzers" "globstar.dev/pkg/config" ) @@ -302,3 +303,37 @@ func (ana *Analyzer) runPatternRules() { func (ana *Analyzer) Report(issue *Issue) { ana.issuesRaised = append(ana.issuesRaised, issue) } + +func (ana *Analyzer) runAnalyzers(defs []AnalyzerDef, captures []sitter.QueryCapture, matchedQuery *sitter.Query) bool { + for _, def := range defs { + var content string + for _, capture := range captures { + if ana.ParseResult.Source == nil { + continue + } + if capture.Node.StartByte() >= uint32(len(ana.ParseResult.Source)) || + capture.Node.EndByte() > uint32(len(ana.ParseResult.Source)) { + continue + } + captureName := matchedQuery.CaptureNameForId(capture.Index) + if captureName == def.Capture { + content = string(ana.ParseResult.Source[capture.Node.StartByte():capture.Node.EndByte()]) + break + } + } + + if content == "" { + return false + } + + switch def.Name { + case "entropy": + if !analyzers.EntropyAnalyzer(content, def.Config) { + return false + } + default: + return false + } + } + return true +} diff --git a/pkg/analysis/analyzers/entropy.go b/pkg/analysis/analyzers/entropy.go new file mode 100644 index 00000000..f1d71299 --- /dev/null +++ b/pkg/analysis/analyzers/entropy.go @@ -0,0 +1,32 @@ +package analyzers + +import ( + "math" +) + +// EntropyAnalyzer checks if the entropy of the given content is at least the minimum entropy. +// Returns true if the content passes the check, false otherwise. +func EntropyAnalyzer(content string, config map[string]interface{}) bool { + minEntropy := 3.0 // default + if val, ok := config["min"].(float64); ok { + minEntropy = val + } + + entropy := calculateShannonEntropy(content) + return entropy >= minEntropy +} + +func calculateShannonEntropy(s string) float64 { + var entropy float64 + counts := make(map[rune]int) + for _, r := range s { + counts[r]++ + } + + l := float64(len(s)) + for _, cnt := range counts { + f := float64(cnt) / l + entropy -= f * math.Log2(f) + } + return entropy +} diff --git a/pkg/analysis/pattern_rule.go b/pkg/analysis/pattern_rule.go index 4f9988b1..45886a34 100644 --- a/pkg/analysis/pattern_rule.go +++ b/pkg/analysis/pattern_rule.go @@ -67,6 +67,7 @@ type patternRuleImpl struct { severity config.Severity pathFilter *PathFilter filters []NodeFilter + analyzerDefs []AnalyzerDef } func (r *patternRuleImpl) Language() Language { @@ -83,6 +84,10 @@ func (r *patternRuleImpl) OnMatch( matchedNode *sitter.Node, captures []sitter.QueryCapture, ) { + // Run analyzers first + if !ana.runAnalyzers(r.analyzerDefs, captures, matchedQuery) { + return + } // replace all '@' with the corresponding capture value message := r.issueMessage @@ -151,11 +156,12 @@ type filterYAML struct { } type PatternRuleFile struct { - Language string `yaml:"language"` - Code string `yaml:"name"` - Message string `yaml:"message"` - Category config.Category `yaml:"category"` - Severity config.Severity `yaml:"severity"` + Language string `yaml:"language"` + Code string `yaml:"name"` + Message string `yaml:"message"` + Category config.Category `yaml:"category"` + Severity config.Severity `yaml:"severity"` + Analyzers []AnalyzerDef `yaml:"analyzers,omitempty"` // Pattern is a single pattern in the form of: // pattern: (some_pattern) // in the YAML file @@ -170,6 +176,12 @@ type PatternRuleFile struct { Include []string `yaml:"include,omitempty"` } +type AnalyzerDef struct { + Name string `yaml:"name"` + Capture string `yaml:"capture"` + Config map[string]interface{} `yaml:"config,omitempty"` +} + // DecodeLanguage converts a stringified language name to its corresponding // Language enum func DecodeLanguage(language string) Language { @@ -349,6 +361,7 @@ func ReadFromBytes(fileContent []byte) (YmlRule, error) { issueId: rule.Code, pathFilter: pathFilter, filters: filters, + analyzerDefs: rule.Analyzers, } return patternRule, nil From 59523d8b4bcf88329ad9f3fc246cac57894b9a8b Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Tue, 25 Feb 2025 21:42:12 +0530 Subject: [PATCH 2/3] cleanup Signed-off-by: burntcarrot --- pkg/analysis/analyzers/entropy.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/analysis/analyzers/entropy.go b/pkg/analysis/analyzers/entropy.go index f1d71299..faab6ada 100644 --- a/pkg/analysis/analyzers/entropy.go +++ b/pkg/analysis/analyzers/entropy.go @@ -7,7 +7,7 @@ import ( // EntropyAnalyzer checks if the entropy of the given content is at least the minimum entropy. // Returns true if the content passes the check, false otherwise. func EntropyAnalyzer(content string, config map[string]interface{}) bool { - minEntropy := 3.0 // default + minEntropy := 3.0 if val, ok := config["min"].(float64); ok { minEntropy = val } From 708cc1eb3665c664901a2dc2d7b10fdb99fa1f47 Mon Sep 17 00:00:00 2001 From: burntcarrot Date: Tue, 25 Feb 2025 21:54:24 +0530 Subject: [PATCH 3/3] rename rules --- ..._hardcoded_tokens.test.py => hardcoded_credential.test.py} | 0 ...security_hardcoded_tokens.yml => hardcoded_credential.yml} | 4 ++-- 2 files changed, 2 insertions(+), 2 deletions(-) rename checkers/python/{security_hardcoded_tokens.test.py => hardcoded_credential.test.py} (100%) rename checkers/python/{security_hardcoded_tokens.yml => hardcoded_credential.yml} (84%) diff --git a/checkers/python/security_hardcoded_tokens.test.py b/checkers/python/hardcoded_credential.test.py similarity index 100% rename from checkers/python/security_hardcoded_tokens.test.py rename to checkers/python/hardcoded_credential.test.py diff --git a/checkers/python/security_hardcoded_tokens.yml b/checkers/python/hardcoded_credential.yml similarity index 84% rename from checkers/python/security_hardcoded_tokens.yml rename to checkers/python/hardcoded_credential.yml index 92b180fe..9d55a4e3 100644 --- a/checkers/python/security_hardcoded_tokens.yml +++ b/checkers/python/hardcoded_credential.yml @@ -1,5 +1,5 @@ language: python -name: security_hardcoded_tokens +name: hardcoded_credential message: "Potential hardcoded credential detected" category: security severity: warning @@ -16,7 +16,7 @@ pattern: | ) ) ) - (#match? @arg_name "^(token|secret|key|password)$")) @security_hardcoded_tokens + (#match? @arg_name "^(token|secret|key|password)$")) @hardcoded_credential analyzers: - name: entropy