diff --git a/checkers/python/hardcoded_credential.test.py b/checkers/python/hardcoded_credential.test.py new file mode 100644 index 00000000..006ba7d8 --- /dev/null +++ b/checkers/python/hardcoded_credential.test.py @@ -0,0 +1,30 @@ +import os + +def main(): + # These should be flagged + # + connect(host="example.com", token="hqd#18ey283y28wdbbcwbd1ueh1ue2h") + # + set_password(password="A3b$c8d!eF9gHiJkLmNoPqRsTuVwXyZ") + # + configure(key="AKIATESTKEYTESTKEYTESTKEYTEST", secret="TEST/SECRET/KEY/EXAMPLE/1234567890") + + # These should NOT be flagged + # + set_password(password="password123") # Low entropy + # + configure(username="test_user", value=42) # Not a sensitive argument + + # Should not flag non-string values + # + set_token(token=os.getenv("API_TOKEN")) + + # Should not flag commented out code + # + # connect(host="example.com", token="commented_out_secret") + + # Edge cases + # + empty_string(arg="") # Empty string + # + numeric_value(key=12345) # Not a string \ No newline at end of file diff --git a/checkers/python/hardcoded_credential.yml b/checkers/python/hardcoded_credential.yml new file mode 100644 index 00000000..9d55a4e3 --- /dev/null +++ b/checkers/python/hardcoded_credential.yml @@ -0,0 +1,32 @@ +language: python +name: hardcoded_credential +message: "Potential hardcoded credential detected" +category: security +severity: warning + +pattern: | + ((call + function: (identifier) @func_name + arguments: (argument_list + (keyword_argument + name: (identifier) @arg_name + value: (string + (string_content) @token_val + ) + ) + ) + ) + (#match? @arg_name "^(token|secret|key|password)$")) @hardcoded_credential + +analyzers: + - name: entropy + capture: "token_val" + config: + min: 3.5 + +exclude: + - "test/**" + - "**/*_test.py" + +description: | + Hardcoded credentials pose a significant security risk. Use secure secret management systems instead of embedding secrets directly in code. \ No newline at end of file diff --git a/pkg/analysis/analyze.go b/pkg/analysis/analyze.go index b0716420..283a50fe 100644 --- a/pkg/analysis/analyze.go +++ b/pkg/analysis/analyze.go @@ -6,6 +6,7 @@ import ( "path/filepath" sitter "github.com/smacker/go-tree-sitter" + "globstar.dev/pkg/analysis/analyzers" "globstar.dev/pkg/config" ) @@ -302,3 +303,37 @@ func (ana *Analyzer) runPatternRules() { func (ana *Analyzer) Report(issue *Issue) { ana.issuesRaised = append(ana.issuesRaised, issue) } + +func (ana *Analyzer) runAnalyzers(defs []AnalyzerDef, captures []sitter.QueryCapture, matchedQuery *sitter.Query) bool { + for _, def := range defs { + var content string + for _, capture := range captures { + if ana.ParseResult.Source == nil { + continue + } + if capture.Node.StartByte() >= uint32(len(ana.ParseResult.Source)) || + capture.Node.EndByte() > uint32(len(ana.ParseResult.Source)) { + continue + } + captureName := matchedQuery.CaptureNameForId(capture.Index) + if captureName == def.Capture { + content = string(ana.ParseResult.Source[capture.Node.StartByte():capture.Node.EndByte()]) + break + } + } + + if content == "" { + return false + } + + switch def.Name { + case "entropy": + if !analyzers.EntropyAnalyzer(content, def.Config) { + return false + } + default: + return false + } + } + return true +} diff --git a/pkg/analysis/analyzers/entropy.go b/pkg/analysis/analyzers/entropy.go new file mode 100644 index 00000000..faab6ada --- /dev/null +++ b/pkg/analysis/analyzers/entropy.go @@ -0,0 +1,32 @@ +package analyzers + +import ( + "math" +) + +// EntropyAnalyzer checks if the entropy of the given content is at least the minimum entropy. +// Returns true if the content passes the check, false otherwise. +func EntropyAnalyzer(content string, config map[string]interface{}) bool { + minEntropy := 3.0 + if val, ok := config["min"].(float64); ok { + minEntropy = val + } + + entropy := calculateShannonEntropy(content) + return entropy >= minEntropy +} + +func calculateShannonEntropy(s string) float64 { + var entropy float64 + counts := make(map[rune]int) + for _, r := range s { + counts[r]++ + } + + l := float64(len(s)) + for _, cnt := range counts { + f := float64(cnt) / l + entropy -= f * math.Log2(f) + } + return entropy +} diff --git a/pkg/analysis/pattern_rule.go b/pkg/analysis/pattern_rule.go index 4f9988b1..45886a34 100644 --- a/pkg/analysis/pattern_rule.go +++ b/pkg/analysis/pattern_rule.go @@ -67,6 +67,7 @@ type patternRuleImpl struct { severity config.Severity pathFilter *PathFilter filters []NodeFilter + analyzerDefs []AnalyzerDef } func (r *patternRuleImpl) Language() Language { @@ -83,6 +84,10 @@ func (r *patternRuleImpl) OnMatch( matchedNode *sitter.Node, captures []sitter.QueryCapture, ) { + // Run analyzers first + if !ana.runAnalyzers(r.analyzerDefs, captures, matchedQuery) { + return + } // replace all '@' with the corresponding capture value message := r.issueMessage @@ -151,11 +156,12 @@ type filterYAML struct { } type PatternRuleFile struct { - Language string `yaml:"language"` - Code string `yaml:"name"` - Message string `yaml:"message"` - Category config.Category `yaml:"category"` - Severity config.Severity `yaml:"severity"` + Language string `yaml:"language"` + Code string `yaml:"name"` + Message string `yaml:"message"` + Category config.Category `yaml:"category"` + Severity config.Severity `yaml:"severity"` + Analyzers []AnalyzerDef `yaml:"analyzers,omitempty"` // Pattern is a single pattern in the form of: // pattern: (some_pattern) // in the YAML file @@ -170,6 +176,12 @@ type PatternRuleFile struct { Include []string `yaml:"include,omitempty"` } +type AnalyzerDef struct { + Name string `yaml:"name"` + Capture string `yaml:"capture"` + Config map[string]interface{} `yaml:"config,omitempty"` +} + // DecodeLanguage converts a stringified language name to its corresponding // Language enum func DecodeLanguage(language string) Language { @@ -349,6 +361,7 @@ func ReadFromBytes(fileContent []byte) (YmlRule, error) { issueId: rule.Code, pathFilter: pathFilter, filters: filters, + analyzerDefs: rule.Analyzers, } return patternRule, nil