diff --git a/internal/scan/apikeys.go b/internal/scan/apikeys.go index 0724f74..f73dd03 100644 --- a/internal/scan/apikeys.go +++ b/internal/scan/apikeys.go @@ -4,13 +4,317 @@ import ( "fmt" "os" "path/filepath" + "regexp" "sort" + "strings" "github.com/Pringled/agentcheck/internal/config" "github.com/Pringled/agentcheck/internal/fsutil" "github.com/Pringled/agentcheck/internal/models" ) +// HighRiskEnvKeys is the set of environment variable names that indicate +// high-value credentials are present in the shell. Values are NEVER read or logged; +// only the key name is used. +var HighRiskEnvKeys = map[string]bool{ + // AI / ML inference + "OPENAI_API_KEY": true, + "ANTHROPIC_API_KEY": true, + "COHERE_API_KEY": true, + "MISTRAL_API_KEY": true, + "REPLICATE_API_KEY": true, + "HUGGINGFACE_TOKEN": true, + "HF_TOKEN": true, // Hugging Face canonical short name (used by huggingface-hub) + "TOGETHER_API_KEY": true, + "GROQ_API_KEY": true, + "VOYAGE_API_KEY": true, + "ELEVEN_LABS_API_KEY": true, + "PINECONE_API_KEY": true, + "STABILITY_API_KEY": true, // Stability AI — pay-per-image generation + "WANDB_API_KEY": true, // Weights & Biases — model/experiment data + "TAVILY_API_KEY": true, // Tavily search — widely used in LangChain/LangGraph agents + "LANGCHAIN_API_KEY": true, // LangSmith tracing (LangChain ecosystem) + "AZURE_OPENAI_API_KEY": true, // Azure OpenAI — distinct from service principal creds + "FAL_KEY": true, // fal.ai — GPU inference, financial risk + "NVIDIA_API_KEY": true, // NVIDIA NIM — enterprise GPU inference + + // Cloud: env-based credentials + "AWS_ACCESS_KEY_ID": true, + "AWS_SECRET_ACCESS_KEY": true, + "AWS_SESSION_TOKEN": true, + "GOOGLE_APPLICATION_CREDENTIALS": true, + "AZURE_CLIENT_SECRET": true, + + // Secrets managers (key to all other secrets) + "VAULT_TOKEN": true, // HashiCorp Vault — grants access to all managed secrets + "OP_SERVICE_ACCOUNT_TOKEN": true, // 1Password Connect service account + "OP_CONNECT_TOKEN": true, // 1Password Connect API token + "INFISICAL_TOKEN": true, // Infisical secrets manager + + // Source control & CI/CD + "GITHUB_TOKEN": true, + "GITLAB_TOKEN": true, + "BITBUCKET_APP_PASSWORD": true, + "NPM_TOKEN": true, + "PYPI_API_TOKEN": true, + "CIRCLE_TOKEN": true, // CircleCI — CI supply chain attack surface + + // Payment + "STRIPE_SECRET_KEY": true, + "BRAINTREE_PRIVATE_KEY": true, + "PAYPAL_CLIENT_SECRET": true, + "SQUARE_ACCESS_TOKEN": true, + "ADYEN_API_KEY": true, // Adyen — enterprise e-commerce payments + "RAZORPAY_KEY_SECRET": true, // Razorpay — dominant in South/SE Asia + "MOLLIE_API_KEY": true, // Mollie — dominant in EU + "PADDLE_API_KEY": true, // Paddle — SaaS billing + + // Messaging & comms (can exfiltrate data or send spam at scale) + "TWILIO_AUTH_TOKEN": true, + "SENDGRID_API_KEY": true, + "MAILGUN_API_KEY": true, + "SLACK_BOT_TOKEN": true, + "DISCORD_BOT_TOKEN": true, + "VONAGE_API_SECRET": true, // Vonage/Nexmo — SMS/voice telephony + "KLAVIYO_API_KEY": true, // Klaviyo — e-commerce email, customer PII + "MAILCHIMP_API_KEY": true, // Mailchimp — customer lists, PII + "CUSTOMERIO_API_KEY": true, // Customer.io — behavioral marketing, PII + "BREVO_API_KEY": true, // Brevo (Sendinblue) — EU email, PII + + // Identity & auth + "OKTA_API_TOKEN": true, + "AUTH0_CLIENT_SECRET": true, + "CLERK_SECRET_KEY": true, // Clerk — popular Next.js auth, auth bypass risk + "WORKOS_API_KEY": true, // WorkOS — enterprise SSO + + // Observability + "DATADOG_API_KEY": true, + "SENTRY_AUTH_TOKEN": true, + "NEW_RELIC_LICENSE_KEY": true, // New Relic — APM data, log exfil + "NEW_RELIC_API_KEY": true, // New Relic user/account API key + "GRAFANA_API_KEY": true, // Grafana Cloud + "GRAFANA_TOKEN": true, // Grafana Cloud access policy token + "HONEYCOMB_API_KEY": true, // Honeycomb — trace data + + // Cloud / hosting / IaC + "VERCEL_TOKEN": true, + "NETLIFY_AUTH_TOKEN": true, + "CLOUDFLARE_API_TOKEN": true, + "HEROKU_API_KEY": true, + "RAILWAY_TOKEN": true, + "FLY_API_TOKEN": true, + "DIGITALOCEAN_TOKEN": true, // DigitalOcean — full infra control + "DO_API_TOKEN": true, // DigitalOcean alternative env var name + "LINODE_TOKEN": true, // Linode/Akamai — full infra control + "RENDER_API_KEY": true, // Render — deploy platform access + "PULUMI_ACCESS_TOKEN": true, // Pulumi — IaC state = all infra secrets + "TFE_TOKEN": true, // Terraform Cloud — IaC state + "CLOUDINARY_API_SECRET": true, // Cloudinary — media storage + + // Databases (connection strings often embed credentials) + "DATABASE_URL": true, + "MONGODB_URI": true, + "REDIS_URL": true, + "MONGODB_ATLAS_PRIVATE_KEY": true, // MongoDB Atlas admin API (separate from connection string) + "TURSO_AUTH_TOKEN": true, // Turso — SQLite-at-edge DB access + "UPSTASH_REDIS_REST_TOKEN": true, // Upstash — serverless Redis/Kafka + "ELASTIC_API_KEY": true, // Elasticsearch — data exfil risk + "ELASTIC_CLOUD_API_KEY": true, // Elastic Cloud management API + + // CRM / e-commerce / dev tools + "ATLASSIAN_API_TOKEN": true, // Atlassian/Jira — project data, PII + "JIRA_API_TOKEN": true, // Jira alternative env var + "HUBSPOT_ACCESS_TOKEN": true, // HubSpot CRM — customer PII + sales data + "SALESFORCE_CLIENT_SECRET": true, // Salesforce — enterprise CRM + "SHOPIFY_API_SECRET_KEY": true, // Shopify — store orders, customer PII + "ZENDESK_API_TOKEN": true, // Zendesk — customer support PII + "INTERCOM_ACCESS_TOKEN": true, // Intercom — customer chat PII + "SEGMENT_WRITE_KEY": true, // Segment — all customer behavioral events + "ALGOLIA_API_KEY": true, // Algolia — search index admin access +} + +// credentialSuffixRe matches env var names that contain a credential-related term. +// Provider name patterns require this suffix to avoid false positives on non-credential +// vars like GITHUB_WORKSPACE or OPENAI_BASE_URL. +var credentialSuffixRe = regexp.MustCompile(`(?i)(^|_)(KEY|TOKEN|SECRET|PASSWORD|CRED)(S?)(_|$)`) + +// providerNamePatterns matches env var names containing a known provider keyword. +// These only produce a finding when the name also matches credentialSuffixRe. +var providerNamePatterns = []*regexp.Regexp{ + // AI / ML providers + regexp.MustCompile(`(?i)OPENAI`), + regexp.MustCompile(`(?i)ANTHROPIC`), + regexp.MustCompile(`(?i)COHERE`), + regexp.MustCompile(`(?i)MISTRAL`), + regexp.MustCompile(`(?i)REPLICATE`), + regexp.MustCompile(`(?i)HUGGINGFACE`), + regexp.MustCompile(`(?i)HF_`), + regexp.MustCompile(`(?i)TOGETHER`), + regexp.MustCompile(`(?i)GROQ`), + regexp.MustCompile(`(?i)VOYAGE`), + regexp.MustCompile(`(?i)ELEVEN_LABS`), + regexp.MustCompile(`(?i)PINECONE`), + regexp.MustCompile(`(?i)OPENROUTER`), + regexp.MustCompile(`(?i)FIREWORKS`), + regexp.MustCompile(`(?i)DEEPSEEK`), + regexp.MustCompile(`(?i)PERPLEXITY`), + regexp.MustCompile(`(?i)CEREBRAS`), + regexp.MustCompile(`(?i)(^|_)XAI_`), // (^|_) anchor avoids mid-word false positives + regexp.MustCompile(`(?i)ASSEMBLYAI`), + regexp.MustCompile(`(?i)AI21`), + regexp.MustCompile(`(?i)NVIDIA_NIM`), + regexp.MustCompile(`(?i)STABILITY`), // Stability AI — image generation + regexp.MustCompile(`(?i)WANDB`), // Weights & Biases — ML experiment tracking + regexp.MustCompile(`(?i)TAVILY`), // Tavily — AI search, common in agents + regexp.MustCompile(`(?i)LANGCHAIN`), // LangSmith (LangChain tracing) + regexp.MustCompile(`(?i)(^|_)FAL_`), // fal.ai — (^|_) anchor avoids mid-word false positives + // Secrets managers + regexp.MustCompile(`(?i)DOPPLER`), + regexp.MustCompile(`(?i)VAULT`), // HashiCorp Vault + regexp.MustCompile(`(?i)INFISICAL`), // Infisical secrets manager + // Google AI (Gemini, Vertex AI, PaLM) + regexp.MustCompile(`(?i)GEMINI`), + regexp.MustCompile(`(?i)VERTEX`), + regexp.MustCompile(`(?i)(^|_)PALM_`), // (^|_) anchor avoids mid-word false positives + // AWS AI + regexp.MustCompile(`(?i)BEDROCK`), + // Azure AI + regexp.MustCompile(`(?i)AZURE_OPENAI`), + regexp.MustCompile(`(?i)AZURE_COGNITIVE`), + // Payment providers + regexp.MustCompile(`(?i)STRIPE`), + regexp.MustCompile(`(?i)BRAINTREE`), + regexp.MustCompile(`(?i)PAYPAL`), + regexp.MustCompile(`(?i)SQUARE`), + regexp.MustCompile(`(?i)ADYEN`), // Adyen — enterprise payments + regexp.MustCompile(`(?i)RAZORPAY`), // Razorpay — dominant in South/SE Asia + regexp.MustCompile(`(?i)MOLLIE`), // Mollie — dominant in EU + regexp.MustCompile(`(?i)PADDLE`), // Paddle — SaaS subscription billing + // Communication / messaging + regexp.MustCompile(`(?i)TWILIO`), + regexp.MustCompile(`(?i)SENDGRID`), + regexp.MustCompile(`(?i)MAILGUN`), + regexp.MustCompile(`(?i)RESEND`), + regexp.MustCompile(`(?i)POSTMARK`), + regexp.MustCompile(`(?i)SPARKPOST`), + regexp.MustCompile(`(?i)SLACK`), + regexp.MustCompile(`(?i)DISCORD`), + regexp.MustCompile(`(?i)VONAGE`), // Vonage/Nexmo — SMS/voice + regexp.MustCompile(`(?i)KLAVIYO`), // Klaviyo — e-commerce email + regexp.MustCompile(`(?i)MAILCHIMP`), // Mailchimp — customer lists + regexp.MustCompile(`(?i)CUSTOMERIO`), // Customer.io — behavioral marketing + regexp.MustCompile(`(?i)BREVO`), // Brevo/Sendinblue — EU email + // Auth / identity + regexp.MustCompile(`(?i)OKTA`), + regexp.MustCompile(`(?i)AUTH0`), + regexp.MustCompile(`(?i)CLERK`), // Clerk — popular Next.js auth + regexp.MustCompile(`(?i)WORKOS`), // WorkOS — enterprise SSO + // Observability + regexp.MustCompile(`(?i)DATADOG`), + regexp.MustCompile(`(?i)SENTRY`), + regexp.MustCompile(`(?i)NEW_RELIC`), // New Relic — APM/log exfil + regexp.MustCompile(`(?i)GRAFANA`), // Grafana Cloud + regexp.MustCompile(`(?i)HONEYCOMB`), // Honeycomb — observability + // Cloud / hosting platforms + regexp.MustCompile(`(?i)VERCEL`), + regexp.MustCompile(`(?i)NETLIFY`), + regexp.MustCompile(`(?i)CLOUDFLARE`), + regexp.MustCompile(`(?i)HEROKU`), + regexp.MustCompile(`(?i)RAILWAY`), + regexp.MustCompile(`(?i)(^|_)FLY_`), // (^|_) anchor avoids mid-word false positives + regexp.MustCompile(`(?i)DIGITALOCEAN`), // DigitalOcean — full infra control + regexp.MustCompile(`(?i)LINODE`), // Linode/Akamai — full infra control + regexp.MustCompile(`(?i)RENDER`), // Render — deploy platform + regexp.MustCompile(`(?i)PULUMI`), // Pulumi — IaC state + regexp.MustCompile(`(?i)CLOUDINARY`), // Cloudinary — media storage + // Source control + regexp.MustCompile(`(?i)GITHUB`), + regexp.MustCompile(`(?i)GITLAB`), + regexp.MustCompile(`(?i)BITBUCKET`), + // Productivity / project tools + regexp.MustCompile(`(?i)(^|_)LINEAR_`), // (^|_) anchor avoids mid-word false positives + regexp.MustCompile(`(?i)NOTION`), + regexp.MustCompile(`(?i)AIRTABLE`), + regexp.MustCompile(`(?i)ATLASSIAN`), // Atlassian/Jira — project data, PII + regexp.MustCompile(`(?i)JIRA`), // Jira + regexp.MustCompile(`(?i)ZENDESK`), // Zendesk — customer support PII + regexp.MustCompile(`(?i)INTERCOM`), // Intercom — customer chat PII + regexp.MustCompile(`(?i)HUBSPOT`), // HubSpot CRM — customer PII + regexp.MustCompile(`(?i)SALESFORCE`), // Salesforce — enterprise CRM + regexp.MustCompile(`(?i)SHOPIFY`), // Shopify — store orders, customer data + regexp.MustCompile(`(?i)SEGMENT`), // Segment — all customer behavioral events + regexp.MustCompile(`(?i)ALGOLIA`), // Algolia — search index admin + // Database-as-a-service + regexp.MustCompile(`(?i)SUPABASE`), + regexp.MustCompile(`(?i)(^|_)NEON_`), // (^|_) anchor avoids mid-word false positives + regexp.MustCompile(`(?i)PLANETSCALE`), + regexp.MustCompile(`(?i)TURSO`), // Turso — SQLite-at-edge + regexp.MustCompile(`(?i)UPSTASH`), // Upstash — serverless Redis/Kafka + regexp.MustCompile(`(?i)ELASTIC`), // Elasticsearch/Elastic Cloud +} + +// credentialSuffixPatterns matches generic credential terms in env var names. +// These match standalone without requiring a provider keyword. +var credentialSuffixPatterns = []*regexp.Regexp{ + regexp.MustCompile(`(?i)API_KEY`), + regexp.MustCompile(`(?i)API_TOKEN`), + regexp.MustCompile(`(?i)SECRET_KEY`), + regexp.MustCompile(`(?i)AUTH_TOKEN`), + regexp.MustCompile(`(?i)ACCESS_TOKEN`), + regexp.MustCompile(`(?i)PRIVATE_KEY`), + regexp.MustCompile(`(?i)SERVICE_KEY`), +} + +// valuePattern describes a provider key format recognisable by prefix and exact total length. +type valuePattern struct { + prefix string + totalLen int + severity models.Severity // HIGH for provider-specific prefixes; UNCERTAIN for ambiguous ones + providerTag string // used to build description, e.g. "OpenAI project" +} + +// valuePatterns lists known API key formats identified by a distinctive prefix and exact total length. +var valuePatterns = []valuePattern{ + // OpenAI - more-specific prefixes listed first so they match before the generic sk- entry. + {prefix: "sk-proj-", totalLen: 56, severity: models.SeverityHigh, providerTag: "OpenAI project"}, + {prefix: "sk-admin-", totalLen: 57, severity: models.SeverityHigh, providerTag: "OpenAI admin"}, + // sk- is shared by many tools (OpenAI legacy, LangChain proxies, self-hosted LLMs, etc.). + // Flag as UNCERTAIN so the user can confirm the actual provider via the variable name. + {prefix: "sk-", totalLen: 51, severity: models.SeverityUncertain, providerTag: "possible OpenAI legacy or other sk- key"}, + // Anthropic - prefix is distinctive enough for HIGH confidence. + {prefix: "sk-ant-", totalLen: 108, severity: models.SeverityHigh, providerTag: "Anthropic"}, + // Stripe - underscore separator makes these provider-specific. + {prefix: "sk_live_", totalLen: 55, severity: models.SeverityHigh, providerTag: "Stripe live secret"}, + {prefix: "sk_test_", totalLen: 55, severity: models.SeverityHigh, providerTag: "Stripe test secret"}, + {prefix: "rk_live_", totalLen: 55, severity: models.SeverityHigh, providerTag: "Stripe live restricted"}, + {prefix: "rk_test_", totalLen: 55, severity: models.SeverityHigh, providerTag: "Stripe test restricted"}, + // GitLab — glpat- + 20 random chars = 26 total. + {prefix: "glpat-", totalLen: 26, severity: models.SeverityHigh, providerTag: "GitLab personal access token"}, + // npm granular access token — npm_ + 36 hex chars = 40 total. + {prefix: "npm_", totalLen: 40, severity: models.SeverityHigh, providerTag: "npm access token"}, + // Groq — gsk_ prefix confirmed in Groq docs. + {prefix: "gsk_", totalLen: 56, severity: models.SeverityHigh, providerTag: "Groq"}, + // Twilio API key SID - SK + 32 hex chars = 34 total. + // SeverityUncertain: SK prefix is broad, false positives are likely. + {prefix: "SK", totalLen: 34, severity: models.SeverityUncertain, providerTag: "Twilio API key SID"}, + // SendGrid — SG. + 22 + . + 43 = 69 total (with the dots). + {prefix: "SG.", totalLen: 69, severity: models.SeverityHigh, providerTag: "SendGrid"}, + // HuggingFace + {prefix: "hf_", totalLen: 37, severity: models.SeverityHigh, providerTag: "HuggingFace"}, + // GitHub tokens — all provider-specific prefixes. + {prefix: "ghp_", totalLen: 40, severity: models.SeverityHigh, providerTag: "GitHub classic PAT"}, + {prefix: "github_pat_", totalLen: 93, severity: models.SeverityHigh, providerTag: "GitHub fine-grained PAT"}, + {prefix: "gho_", totalLen: 40, severity: models.SeverityHigh, providerTag: "GitHub OAuth token"}, + {prefix: "ghu_", totalLen: 40, severity: models.SeverityHigh, providerTag: "GitHub user token"}, + {prefix: "ghs_", totalLen: 40, severity: models.SeverityHigh, providerTag: "GitHub app installation token"}, + {prefix: "ghr_", totalLen: 40, severity: models.SeverityHigh, providerTag: "GitHub refresh token"}, + // Tavily — tvly- prefix + 40 chars = 45 total. + {prefix: "tvly-", totalLen: 45, severity: models.SeverityHigh, providerTag: "Tavily search"}, + // LangSmith — lsv2_pt_ (personal access) or lsv2_sk_ (service key) prefix + 40 chars. + {prefix: "lsv2_pt_", totalLen: 48, severity: models.SeverityHigh, providerTag: "LangSmith API key"}, + {prefix: "lsv2_sk_", totalLen: 48, severity: models.SeverityHigh, providerTag: "LangSmith service key"}, +} + // credentialFiles is the list of credential files/dirs to check. var credentialFiles = []config.CredentialFile{ {Path: "~/.config/gcloud/", Label: "GCP application default credentials"}, @@ -25,7 +329,7 @@ var credentialFiles = []config.CredentialFile{ } // APIKeyScanner scans for high-risk API keys in environment variables and credential config files. -// It reports key names and file paths only; never values or file contents. +// Key names and file paths only are reported in findings; values and file contents are never emitted. // It never returns skipped=true. type APIKeyScanner struct { Base @@ -54,14 +358,21 @@ func NewAPIKeyScannerWithConfig(cfg config.Config) *APIKeyScanner { } } -// Name returns the canonical scanner ID. func (s *APIKeyScanner) Name() string { return "api_keys" } -// Scan detects high-risk API keys in env vars and credential file presence. -// Implements Scanner. Never returns skipped=true. func (s *APIKeyScanner) Scan() models.ScanResult { var findings []models.Finding - findings = append(findings, s.scanEnvKeys()...) + // seenEnvNames is shared across all three env-scanning passes so that any variable + // claimed by an earlier pass is not re-reported by a later one. Order: + // 1. scanEnvKeys — exact-match built-in + user-configured extra keys + // 2. scanNameRegex — name-pattern heuristics (MY_OPENAI_KEY etc.) + // 3. scanValuePatterns — prefix+length value matching + // A variable in ExtraEnvKeys that also matches a nameRegex pattern therefore produces + // exactly one finding (from scanEnvKeys, the highest-priority pass). + seenEnvNames := make(map[string]bool) + findings = append(findings, s.scanEnvKeys(seenEnvNames)...) + findings = append(findings, s.scanNameRegex(seenEnvNames)...) + findings = append(findings, s.scanValuePatterns(seenEnvNames)...) findings = append(findings, s.scanCredentialFiles()...) return models.ScanResult{ ScannerName: "api_keys", @@ -71,7 +382,7 @@ func (s *APIKeyScanner) Scan() models.ScanResult { // scanEnvKeys checks built-in and extra environment variable key names for presence. // Key names only are reported; values are never read or stored. -func (s *APIKeyScanner) scanEnvKeys() []models.Finding { +func (s *APIKeyScanner) scanEnvKeys(seenEnvNames map[string]bool) []models.Finding { var findings []models.Finding // KEYS-01: Built-in high-risk env vars (sorted for deterministic output). @@ -81,28 +392,34 @@ func (s *APIKeyScanner) scanEnvKeys() []models.Finding { } sort.Strings(keys) for _, key := range keys { - if val := os.Getenv(key); val != "" { - _ = val // value is intentionally discarded; presence only - findings = append(findings, envKeyFinding(key)) + if os.Getenv(key) != "" { + seenEnvNames[key] = true + findings = append(findings, models.Finding{ + Scanner: "api_keys", + Resource: key, + Severity: models.SeverityHigh, + Description: "Can be used to make authenticated API calls.", + }) } } // Extra env keys from user config (sorted for deterministic output). - // Skip any that are already in the built-in set or seen earlier in the extras - // list to avoid duplicate findings. if len(s.ExtraEnvKeys) > 0 { - seenExtra := make(map[string]bool, len(s.ExtraEnvKeys)) extraKeys := make([]string, len(s.ExtraEnvKeys)) copy(extraKeys, s.ExtraEnvKeys) sort.Strings(extraKeys) for _, key := range extraKeys { - if HighRiskEnvKeys[key] || seenExtra[key] { - continue // already covered by built-in check or earlier extra + if HighRiskEnvKeys[key] || seenEnvNames[key] { + continue } - seenExtra[key] = true - if val := os.Getenv(key); val != "" { - _ = val // value is intentionally discarded; presence only - findings = append(findings, envKeyFinding(key)) + if os.Getenv(key) != "" { + seenEnvNames[key] = true + findings = append(findings, models.Finding{ + Scanner: "api_keys", + Resource: key, + Severity: models.SeverityHigh, + Description: "Can be used to make authenticated API calls.", + }) } } } @@ -110,42 +427,120 @@ func (s *APIKeyScanner) scanEnvKeys() []models.Finding { return findings } -// scanCredentialFiles checks built-in and extra credential file paths for existence. -// File paths only are reported; file contents are never read or stored. -func (s *APIKeyScanner) scanCredentialFiles() []models.Finding { - var findings []models.Finding +// envEntry holds a parsed, non-empty environment variable that has not yet been claimed. +type envEntry struct { + name string + value string +} - // KEYS-02: Built-in credential files. - // If home directory cannot be resolved, skip all ~-based paths to avoid - // scanning incorrect root-relative paths (e.g. /.aws/credentials). - homeDir := s.resolveHomeDir() - for _, cf := range credentialFiles { - if homeDir == "" && len(cf.Path) > 0 && cf.Path[0] == '~' { +// unclaimedEnvEntries returns non-empty env vars that are not in HighRiskEnvKeys and not +// already present in seenEnvNames. It is used by scanNameRegex and scanValuePatterns to +// avoid repeating the same iteration and filtering logic in both methods. +func unclaimedEnvEntries(seenEnvNames map[string]bool) []envEntry { + var entries []envEntry + for _, raw := range os.Environ() { + idx := strings.IndexByte(raw, '=') + if idx < 0 { continue } - expanded := expandHome(cf.Path, homeDir) - if fsutil.Exists(expanded) { + name := raw[:idx] + value := raw[idx+1:] + if HighRiskEnvKeys[name] || value == "" || seenEnvNames[name] { + continue + } + entries = append(entries, envEntry{name: name, value: value}) + } + return entries +} + +// scanNameRegex checks env var names against known provider keywords and generic +// credential terms. It catches non-standard names like MY_OPENAI_KEY that are +// missed by the exact-match HighRiskEnvKeys pass. Key names only are reported; +// values are checked only for emptiness and then discarded. +// seenEnvNames is the shared cross-pass dedup set; matched names are added to it. +func (s *APIKeyScanner) scanNameRegex(seenEnvNames map[string]bool) []models.Finding { + var findings []models.Finding + + for _, e := range unclaimedEnvEntries(seenEnvNames) { + matched := false + // Provider patterns require the name to also contain a credential suffix. + for _, re := range providerNamePatterns { + if re.MatchString(e.name) && credentialSuffixRe.MatchString(e.name) { + matched = true + break + } + } + // Credential suffix patterns match standalone. + if !matched { + for _, re := range credentialSuffixPatterns { + if re.MatchString(e.name) { + matched = true + break + } + } + } + if matched { + seenEnvNames[e.name] = true findings = append(findings, models.Finding{ Scanner: "api_keys", - Resource: expanded, // path only, never file contents - Severity: models.SeverityModerate, - Description: fmt.Sprintf("Credential file readable at %s.", expanded), + Resource: e.name, + Severity: models.SeverityHigh, + Description: "Can be used to make authenticated API calls.", }) } } - // Extra credential files from user config. - // Deduplicate by expanded path to avoid reporting the same file twice. - seenExtraPath := make(map[string]bool, len(s.ExtraCredentialFiles)) - for _, cf := range s.ExtraCredentialFiles { + return findings +} + +// scanValuePatterns reads env var values to match against known provider prefixes. +// Values are used only for prefix+length matching and then discarded. +func (s *APIKeyScanner) scanValuePatterns(seenEnvNames map[string]bool) []models.Finding { + var findings []models.Finding + + for _, e := range unclaimedEnvEntries(seenEnvNames) { + for _, p := range valuePatterns { + if strings.HasPrefix(e.value, p.prefix) && len(e.value) == p.totalLen { + seenEnvNames[e.name] = true + findings = append(findings, models.Finding{ + Scanner: "api_keys", + Resource: e.name, // env var NAME, never the value + Severity: p.severity, + Description: fmt.Sprintf("Value matches %s API key format.", p.providerTag), + }) + break // one finding per variable name + } + } + } + + return findings +} + +// scanCredentialFiles checks built-in and extra credential file paths for existence. +// File paths only are reported; file contents are never read or stored. +func (s *APIKeyScanner) scanCredentialFiles() []models.Finding { + var findings []models.Finding + + // If home directory cannot be resolved, skip all ~-based paths to avoid + // scanning incorrect root-relative paths (e.g. /.aws/credentials). + homeDir := s.HomeDir + if homeDir == "" { + if h, err := os.UserHomeDir(); err == nil { + homeDir = h + } + } + // seenPath deduplicates built-in and extra paths. + allCredFiles := append(credentialFiles, s.ExtraCredentialFiles...) + seenPath := make(map[string]bool, len(allCredFiles)) + for _, cf := range allCredFiles { if homeDir == "" && len(cf.Path) > 0 && cf.Path[0] == '~' { continue } - expanded := expandHome(cf.Path, homeDir) - if seenExtraPath[expanded] { - continue // duplicate path in extras list + expanded := filepath.Clean(expandHome(cf.Path, homeDir)) + if seenPath[expanded] { + continue } - seenExtraPath[expanded] = true + seenPath[expanded] = true if fsutil.Exists(expanded) { findings = append(findings, models.Finding{ Scanner: "api_keys", @@ -159,28 +554,6 @@ func (s *APIKeyScanner) scanCredentialFiles() []models.Finding { return findings } -// envKeyFinding builds a HIGH severity finding for a detected environment variable key. -func envKeyFinding(key string) models.Finding { - return models.Finding{ - Scanner: "api_keys", - Resource: key, // key name only, never the value - Severity: models.SeverityHigh, - Description: "Can be used to make authenticated API calls.", - } -} - -// resolveHomeDir returns the effective home directory for credential file expansion. -func (s *APIKeyScanner) resolveHomeDir() string { - if s.HomeDir != "" { - return s.HomeDir - } - home, err := os.UserHomeDir() - if err != nil { - return "" - } - return home -} - // expandHome replaces a leading ~ with the given homeDir. func expandHome(path, homeDir string) string { if len(path) == 0 { diff --git a/internal/scan/apikeys_test.go b/internal/scan/apikeys_test.go index 90cc396..4901d40 100644 --- a/internal/scan/apikeys_test.go +++ b/internal/scan/apikeys_test.go @@ -3,6 +3,7 @@ package scan_test import ( "os" "path/filepath" + "strings" "testing" "github.com/Pringled/agentcheck/internal/config" @@ -17,7 +18,16 @@ func clearHighRiskEnv(t *testing.T) { } } -// newScannerWithHome creates an APIKeyScanner with HomeDir set to home and no extras. +// clearAllEnv sets every environment variable to empty for the duration of the test. +func clearAllEnv(t *testing.T) { + t.Helper() + for _, entry := range os.Environ() { + if idx := strings.IndexByte(entry, '='); idx >= 0 { + t.Setenv(entry[:idx], "") + } + } +} + func newScannerWithHome(home string) *scan.APIKeyScanner { s := scan.NewAPIKeyScanner() s.HomeDir = home @@ -34,6 +44,18 @@ func TestAPIKeyScanner_HighRiskEnvKeysContainsKnownKeys(t *testing.T) { "GITHUB_TOKEN", "STRIPE_SECRET_KEY", "DATABASE_URL", + // New entries + "STABILITY_API_KEY", + "WANDB_API_KEY", + "VAULT_TOKEN", + "INFISICAL_TOKEN", + "CLERK_SECRET_KEY", + "DIGITALOCEAN_TOKEN", + "PULUMI_ACCESS_TOKEN", + "SHOPIFY_API_SECRET_KEY", + "HUBSPOT_ACCESS_TOKEN", + "TURSO_AUTH_TOKEN", + "UPSTASH_REDIS_REST_TOKEN", } for _, key := range known { if !scan.HighRiskEnvKeys[key] { @@ -65,7 +87,7 @@ func TestAPIKeyScanner_NeverStoresSecretValue(t *testing.T) { } func TestAPIKeyScanner_EmptyEnvNoFindings(t *testing.T) { - clearHighRiskEnv(t) + clearAllEnv(t) s := newScannerWithHome(t.TempDir()) result := s.Scan() @@ -129,20 +151,6 @@ func TestAPIKeyScanner_CredentialFileContentNotInFindings(t *testing.T) { assertResource(t, result.Findings, credFile) } -func TestAPIKeyScanner_NoCredentialFileNoFinding(t *testing.T) { - clearHighRiskEnv(t) - - s := newScannerWithHome(t.TempDir()) - result := s.Scan() - - if result.Skipped { - t.Error("APIKeyScanner must never return skipped=true") - } - if len(result.Findings) != 0 { - t.Errorf("expected 0 findings in empty home dir, got %d: %v", len(result.Findings), resourceSet(result.Findings)) - } -} - func TestAPIKeyScanner_GCPCredentialsDirDetected(t *testing.T) { home := t.TempDir() gcloudDir := filepath.Join(home, ".config", "gcloud") @@ -245,6 +253,25 @@ func TestAPIKeyScanner_NoDuplicateFindings(t *testing.T) { return tokenFile }, }, + { + name: "extra credential file duplicates built-in path", + makeScanner: func(home string) *scan.APIKeyScanner { + return &scan.APIKeyScanner{ + HomeDir: home, + ExtraCredentialFiles: []config.CredentialFile{ + {Path: "~/.netrc", Label: "netrc (duplicate of built-in)"}, + }, + } + }, + setup: func(t *testing.T, home string) string { + netrcFile := filepath.Join(home, ".netrc") + if err := os.WriteFile(netrcFile, []byte("machine example.com"), 0o600); err != nil { + t.Fatalf("create .netrc: %v", err) + } + clearHighRiskEnv(t) + return netrcFile + }, + }, } for _, tc := range cases { @@ -308,3 +335,369 @@ func TestAPIKeyScanner_ExtraCredentialFiles_TildeExpanded(t *testing.T) { assertResource(t, result.Findings, tokenFile) } + +func TestAPIKeyScanner_NameRegex_ProviderKeyword(t *testing.T) { + t.Setenv("MY_OPENAI_KEY", "sk-something") + clearHighRiskEnv(t) + + s := newScannerWithHome(t.TempDir()) + result := s.Scan() + + assertResource(t, result.Findings, "MY_OPENAI_KEY") +} + +func TestAPIKeyScanner_NameRegex_GenericTerm(t *testing.T) { + t.Setenv("INTERNAL_API_KEY", "secret") + clearHighRiskEnv(t) + + s := newScannerWithHome(t.TempDir()) + result := s.Scan() + + assertResource(t, result.Findings, "INTERNAL_API_KEY") +} + +func TestAPIKeyScanner_NameRegex_NoDuplicateWithBuiltin(t *testing.T) { + t.Setenv("OPENAI_API_KEY", "sk-test") + // Clear all built-in keys except OPENAI_API_KEY. + for k := range scan.HighRiskEnvKeys { + if k != "OPENAI_API_KEY" { + t.Setenv(k, "") + } + } + + s := newScannerWithHome(t.TempDir()) + result := s.Scan() + + count := 0 + for _, f := range result.Findings { + if f.Resource == "OPENAI_API_KEY" { + count++ + } + } + if count != 1 { + t.Errorf("expected exactly 1 finding for OPENAI_API_KEY, got %d", count) + } +} + +func TestAPIKeyScanner_ValuePatterns(t *testing.T) { + cases := []struct { + name string + envVar string + value string + wantSeverity string + wantDescSub string // substring expected in description + }{ + {"ambiguous sk-", "SOME_CRED", "sk-" + strings.Repeat("x", 48), "UNCERTAIN", "possible OpenAI legacy"}, + {"Stripe live secret", "PAYMENT_KEY", "sk_live_" + strings.Repeat("s", 47), "HIGH", "Stripe"}, + {"Stripe test secret", "TEST_PAYMENT_KEY", "sk_test_" + strings.Repeat("t", 47), "HIGH", "Stripe"}, + {"GitLab PAT", "REPO_TOKEN", "glpat-" + strings.Repeat("g", 20), "HIGH", "GitLab"}, + {"npm token", "REGISTRY_KEY", "npm_" + strings.Repeat("n", 36), "HIGH", "npm"}, + {"Groq", "INFERENCE_KEY", "gsk_" + strings.Repeat("q", 52), "HIGH", "Groq"}, + {"SendGrid", "MAIL_KEY", "SG." + strings.Repeat("a", 22) + "." + strings.Repeat("b", 43), "HIGH", "SendGrid"}, + {"Anthropic", "LLM_KEY", "sk-ant-" + strings.Repeat("a", 101), "HIGH", "Anthropic"}, + {"OpenAI project", "SOME_AI_CRED", "sk-proj-" + strings.Repeat("a", 48), "HIGH", "OpenAI project"}, + {"HuggingFace", "ML_MODEL_CRED", "hf_" + strings.Repeat("b", 34), "HIGH", "HuggingFace"}, + {"GitHub classic PAT", "WORK_GH_TOKEN", "ghp_" + strings.Repeat("c", 36), "HIGH", "GitHub"}, + {"Twilio SID", "CRED_SID", "SK" + strings.Repeat("f", 32), "UNCERTAIN", "Twilio"}, + {"Tavily", "SEARCH_KEY", "tvly-" + strings.Repeat("t", 40), "HIGH", "Tavily"}, + {"LangSmith personal token", "TRACE_KEY", "lsv2_pt_" + strings.Repeat("l", 40), "HIGH", "LangSmith"}, + {"LangSmith service key", "TRACE_SVC_KEY", "lsv2_sk_" + strings.Repeat("l", 40), "HIGH", "LangSmith"}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Setenv(tc.envVar, tc.value) + clearHighRiskEnv(t) + + s := newScannerWithHome(t.TempDir()) + result := s.Scan() + + assertResource(t, result.Findings, tc.envVar) + for _, f := range result.Findings { + if f.Resource == tc.envVar { + if string(f.Severity) != tc.wantSeverity { + t.Errorf("severity: got %q, want %q", f.Severity, tc.wantSeverity) + } + if !strings.Contains(f.Description, tc.wantDescSub) { + t.Errorf("description %q missing %q", f.Description, tc.wantDescSub) + } + } + } + assertNoSecretValue(t, result.Findings, tc.value) + }) + } +} + +func TestAPIKeyScanner_NameRegex_AnchoredPatterns(t *testing.T) { + cases := []struct { + name string + shouldMatch map[string]string + shouldNotMatch map[string]string + }{ + { + name: "FLY_", + shouldMatch: map[string]string{"FLY_API_TOKEN": "real-token", "MY_FLY_TOKEN": "also-real"}, + shouldNotMatch: map[string]string{"BUTTERFLY_KEY": "v", "FLYWEIGHT_INDEX": "v"}, + }, + { + name: "NEON_", + shouldMatch: map[string]string{"NEON_API_KEY": "real-neon-key", "MY_NEON_KEY": "also-real"}, + shouldNotMatch: map[string]string{"ANEMONE_CONFIG": "v", "NEONLIGHTS_COLOR": "v"}, + }, + { + name: "LINEAR_", + shouldMatch: map[string]string{"LINEAR_API_KEY": "real-linear-key", "MY_LINEAR_TOKEN": "also-real"}, + shouldNotMatch: map[string]string{"BILINEAR_FILTER": "v"}, + }, + { + name: "PALM_", + shouldMatch: map[string]string{"PALM_API_KEY": "real-palm-key", "MY_PALM_KEY": "also-real"}, + shouldNotMatch: map[string]string{"NAPALM_MODE": "v"}, + }, + { + name: "XAI_", + shouldMatch: map[string]string{"XAI_API_KEY": "real-xai-key", "MY_XAI_KEY": "also-real"}, + shouldNotMatch: map[string]string{"PROXAI_ENDPOINT": "https://api.proxai.com", "RELAXAI_MODE": "true"}, + }, + { + name: "FAL_", + shouldMatch: map[string]string{"FAL_API_KEY": "real-fal-key", "MY_FAL_KEY": "also-real"}, + shouldNotMatch: map[string]string{"DEFAULT_CONFIG": "v", "HALFLIFE_COUNT": "v"}, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + clearAllEnv(t) + for k, v := range tc.shouldMatch { + t.Setenv(k, v) + } + for k, v := range tc.shouldNotMatch { + t.Setenv(k, v) + } + + s := newScannerWithHome(t.TempDir()) + result := s.Scan() + + for k := range tc.shouldMatch { + assertResource(t, result.Findings, k) + } + for k := range tc.shouldNotMatch { + if contains(result.Findings, k) { + t.Errorf("%s should not be flagged by %s pattern", k, tc.name) + } + } + }) + } +} + +func TestAPIKeyScanner_NameRegex_NewProviders(t *testing.T) { + clearHighRiskEnv(t) + cases := []struct { + envVar string + value string + }{ + // Google / cloud AI + {"MY_GEMINI_KEY", "gemini-key-value"}, + {"VERTEX_API_KEY", "vertex-key-value"}, + {"BEDROCK_ACCESS_KEY", "bedrock-key-value"}, + {"AZURE_OPENAI_KEY", "azure-openai-key"}, + // Communication + {"RESEND_API_KEY", "resend-key-value"}, + {"POSTMARK_TOKEN", "postmark-key-value"}, + // Productivity / project tools + {"MY_LINEAR_TOKEN", "linear-key-value"}, + {"NOTION_API_KEY", "notion-key-value"}, + {"AIRTABLE_KEY", "airtable-key-value"}, + // Database-as-a-service + {"SUPABASE_KEY", "supabase-key-value"}, + {"NEON_API_KEY", "neon-key-value"}, + {"PLANETSCALE_TOKEN", "ps-key-value"}, + // Newer AI providers + {"OPENROUTER_API_KEY", "or-key-value"}, + {"FIREWORKS_API_KEY", "fw-key-value"}, + {"DEEPSEEK_API_KEY", "ds-key-value"}, + {"PERPLEXITY_API_KEY", "pplx-key-value"}, + {"CEREBRAS_API_KEY", "cb-key-value"}, + {"DOPPLER_TOKEN", "dp-token-value"}, + {"XAI_API_KEY", "xai-key-value"}, + {"ASSEMBLYAI_API_KEY", "aai-key-value"}, + {"AI21_API_KEY", "ai21-key-value"}, + {"NVIDIA_NIM_API_KEY", "nim-key-value"}, + // New AI/ML providers + {"STABILITY_API_KEY", "stability-key-value"}, + {"WANDB_PROJECT_KEY", "wandb-key-value"}, + {"TAVILY_API_KEY", "tavily-key-value"}, + {"LANGCHAIN_API_KEY", "langchain-key-value"}, + {"AZURE_OPENAI_API_KEY", "azure-oai-key"}, + {"FAL_API_KEY", "fal-key-value"}, + // Secrets managers + {"VAULT_API_TOKEN", "vault-key-value"}, + {"INFISICAL_API_TOKEN", "infisical-key-value"}, + // New payment providers + {"ADYEN_API_KEY", "adyen-key-value"}, + {"RAZORPAY_SECRET_KEY", "razorpay-key-value"}, + {"MOLLIE_API_KEY", "mollie-key-value"}, + {"PADDLE_API_KEY", "paddle-key-value"}, + // New communication providers + {"VONAGE_API_SECRET", "vonage-key-value"}, + {"KLAVIYO_API_KEY", "klaviyo-key-value"}, + {"MAILCHIMP_API_KEY", "mailchimp-key-value"}, + {"CUSTOMERIO_API_KEY", "customerio-key-value"}, + {"BREVO_API_KEY", "brevo-key-value"}, + // New auth providers + {"CLERK_SECRET_KEY", "clerk-key-value"}, + {"WORKOS_API_KEY", "workos-key-value"}, + // New observability providers + {"NEW_RELIC_LICENSE_KEY", "newrelic-key-value"}, + {"GRAFANA_API_KEY", "grafana-key-value"}, + {"HONEYCOMB_API_KEY", "honeycomb-key-value"}, + // New cloud / IaC providers + {"DIGITALOCEAN_API_KEY", "do-key-value"}, + {"LINODE_API_TOKEN", "linode-key-value"}, + {"RENDER_API_KEY", "render-key-value"}, + {"PULUMI_ACCESS_TOKEN", "pulumi-key-value"}, + {"CLOUDINARY_API_SECRET", "cloudinary-key-value"}, + // New CI/CD + {"CIRCLE_TOKEN", "circle-key-value"}, + // New dev tools / CRM + {"ATLASSIAN_API_TOKEN", "atlassian-key-value"}, + {"JIRA_API_TOKEN", "jira-key-value"}, + {"ZENDESK_API_TOKEN", "zendesk-key-value"}, + {"INTERCOM_ACCESS_TOKEN", "intercom-key-value"}, + {"HUBSPOT_API_KEY", "hubspot-key-value"}, + {"SALESFORCE_CLIENT_SECRET", "sf-key-value"}, + {"SHOPIFY_API_SECRET_KEY", "shopify-key-value"}, + {"SEGMENT_WRITE_KEY", "segment-key-value"}, + {"ALGOLIA_API_KEY", "algolia-key-value"}, + // New database providers + {"TURSO_AUTH_TOKEN", "turso-key-value"}, + {"UPSTASH_REDIS_REST_TOKEN", "upstash-key-value"}, + {"ELASTIC_API_KEY", "elastic-key-value"}, + } + + for _, tc := range cases { + t.Setenv(tc.envVar, tc.value) + } + + s := newScannerWithHome(t.TempDir()) + result := s.Scan() + + for _, tc := range cases { + assertResource(t, result.Findings, tc.envVar) + } +} + +func TestAPIKeyScanner_ValuePattern_NoMatchWrongLength(t *testing.T) { + value := "sk-proj-" + strings.Repeat("x", 10) // total 18 chars, wrong length for any pattern + t.Setenv("SOME_KEY", value) + clearHighRiskEnv(t) + + s := newScannerWithHome(t.TempDir()) + result := s.Scan() + + for _, f := range result.Findings { + if f.Resource == "SOME_KEY" { + t.Errorf("got unexpected finding for SOME_KEY with wrong-length value") + } + } +} + +func TestAPIKeyScanner_CrossPassDedup_NameRegexWins(t *testing.T) { + // CUSTOM_STRIPE_KEY matches the STRIPE name-regex. + // sk_live_ + 47 chars matches the Stripe live secret value pattern. + // Without cross-pass dedup both passes would emit a finding. + value := "sk_live_" + strings.Repeat("s", 47) // total 55 chars + t.Setenv("CUSTOM_STRIPE_KEY", value) + clearHighRiskEnv(t) + + s := newScannerWithHome(t.TempDir()) + result := s.Scan() + + count := 0 + for _, f := range result.Findings { + if f.Resource == "CUSTOM_STRIPE_KEY" { + count++ + } + } + if count != 1 { + t.Errorf("expected exactly 1 finding for CUSTOM_STRIPE_KEY (cross-pass dedup), got %d", count) + } + // The finding must be the name-regex one (no "Value matches" in description). + for _, f := range result.Findings { + if f.Resource == "CUSTOM_STRIPE_KEY" { + if strings.Contains(f.Description, "Value matches") { + t.Errorf("expected name-regex finding (not value-pattern), got description: %q", f.Description) + } + } + } +} + +func TestAPIKeyScanner_ExtraEnvKeys_NoDuplicateWithNameRegex(t *testing.T) { + const key = "MY_OPENAI_KEY" // matches OPENAI nameRegexPattern AND is in ExtraEnvKeys + t.Setenv(key, "sk-test-value") + clearHighRiskEnv(t) + + s := &scan.APIKeyScanner{ + HomeDir: t.TempDir(), + ExtraEnvKeys: []string{key}, + } + result := s.Scan() + + count := 0 + for _, f := range result.Findings { + if f.Resource == key { + count++ + } + } + if count != 1 { + t.Errorf("expected exactly 1 finding for %q (ExtraEnvKeys + nameRegex cross-pass dedup), got %d", key, count) + } +} + +func TestAPIKeyScanner_ValuePattern_BuiltinSkipped(t *testing.T) { + value := "sk-proj-" + strings.Repeat("z", 48) // total 56 chars - matches OpenAI project pattern + t.Setenv("OPENAI_API_KEY", value) + // Clear all other built-in keys. + for k := range scan.HighRiskEnvKeys { + if k != "OPENAI_API_KEY" { + t.Setenv(k, "") + } + } + + s := newScannerWithHome(t.TempDir()) + result := s.Scan() + + count := 0 + for _, f := range result.Findings { + if f.Resource == "OPENAI_API_KEY" { + count++ + } + } + if count != 1 { + t.Errorf("expected exactly 1 finding for OPENAI_API_KEY, got %d", count) + } +} + +func TestAPIKeyScanner_NameRegex_ProviderWithoutSuffix_NotFlagged(t *testing.T) { + clearAllEnv(t) + // Provider keyword present but no credential suffix - should NOT be flagged. + t.Setenv("GITHUB_WORKSPACE", "/home/runner/work") + t.Setenv("GITHUB_ACTIONS", "true") + t.Setenv("OPENAI_BASE_URL", "https://api.openai.com") + t.Setenv("STRIPE_WEBHOOK_ENDPOINT", "https://example.com/webhook") + // Substring false positives: MONKEY contains KEY, DONKEY contains KEY. + t.Setenv("GITHUB_MONKEY", "banana") + t.Setenv("OPENAI_DONKEY", "hee-haw") + + s := newScannerWithHome(t.TempDir()) + result := s.Scan() + + for _, f := range result.Findings { + switch f.Resource { + case "GITHUB_WORKSPACE", "GITHUB_ACTIONS", "OPENAI_BASE_URL", "STRIPE_WEBHOOK_ENDPOINT", + "GITHUB_MONKEY", "OPENAI_DONKEY": + t.Errorf("%s should not be flagged (provider keyword without credential suffix)", f.Resource) + } + } +} diff --git a/internal/scan/local.go b/internal/scan/local.go index 4e93a6f..e55a1af 100644 --- a/internal/scan/local.go +++ b/internal/scan/local.go @@ -10,6 +10,9 @@ import ( "github.com/Pringled/agentcheck/internal/models" ) +// K8SProdPatterns is the set of substrings that identify a Kubernetes context as production. +var K8SProdPatterns = []string{"prod", "production", "prd", "live"} + // toolCheck specifies a simple binary-outcome CLI tool check. // A check runs cmd and produces: // - confirmedFinding when rc == 0 (tool confirmed accessible/authenticated). diff --git a/internal/scan/scan.go b/internal/scan/scan.go index 79023aa..d27209a 100644 --- a/internal/scan/scan.go +++ b/internal/scan/scan.go @@ -148,76 +148,6 @@ func runScanner(sc Scanner) (result models.ScanResult) { return sc.Scan() } -// K8SProdPatterns is the set of substrings that identify a Kubernetes context as production. -var K8SProdPatterns = []string{"prod", "production", "prd", "live"} - -// HighRiskEnvKeys is the set of environment variable names that indicate -// high-value credentials are present in the shell. Values are NEVER read or logged; -// only the key name is used. -var HighRiskEnvKeys = map[string]bool{ - // AI / ML inference - "OPENAI_API_KEY": true, - "ANTHROPIC_API_KEY": true, - "COHERE_API_KEY": true, - "MISTRAL_API_KEY": true, - "REPLICATE_API_KEY": true, - "HUGGINGFACE_TOKEN": true, - "HF_TOKEN": true, // Hugging Face canonical short name (used by huggingface-hub) - "TOGETHER_API_KEY": true, - "GROQ_API_KEY": true, - "VOYAGE_API_KEY": true, - "ELEVEN_LABS_API_KEY": true, - "PINECONE_API_KEY": true, - - // Cloud: env-based credentials - "AWS_ACCESS_KEY_ID": true, - "AWS_SECRET_ACCESS_KEY": true, - "AWS_SESSION_TOKEN": true, - "GOOGLE_APPLICATION_CREDENTIALS": true, - "AZURE_CLIENT_SECRET": true, - "AZURE_CLIENT_ID": true, - "AZURE_TENANT_ID": true, - - // Source control & CI/CD - "GITHUB_TOKEN": true, - "GITLAB_TOKEN": true, - "BITBUCKET_APP_PASSWORD": true, - "NPM_TOKEN": true, - "PYPI_API_TOKEN": true, - - // Payment - "STRIPE_SECRET_KEY": true, - "BRAINTREE_PRIVATE_KEY": true, - "PAYPAL_CLIENT_SECRET": true, - "SQUARE_ACCESS_TOKEN": true, - - // Messaging & comms (can exfiltrate data at scale) - "TWILIO_AUTH_TOKEN": true, - "SENDGRID_API_KEY": true, - "MAILGUN_API_KEY": true, - "SLACK_BOT_TOKEN": true, - "DISCORD_BOT_TOKEN": true, - - // Identity & auth - "OKTA_API_TOKEN": true, - "AUTH0_CLIENT_SECRET": true, - - // Observability & infra - "DATADOG_API_KEY": true, - "SENTRY_AUTH_TOKEN": true, - "VERCEL_TOKEN": true, - "NETLIFY_AUTH_TOKEN": true, - "CLOUDFLARE_API_TOKEN": true, - "HEROKU_API_KEY": true, - "RAILWAY_TOKEN": true, - "FLY_API_TOKEN": true, - - // Databases (connection strings often embed credentials) - "DATABASE_URL": true, - "MONGODB_URI": true, - "REDIS_URL": true, -} - // Summarise computes a Summary from a slice of ScanResults. // UNCERTAIN findings contribute to the uncertain count but not to findings_total, // since they represent incomplete checks rather than confirmed findings.