diff --git a/README.md b/README.md index 441750d4..83e252ea 100644 --- a/README.md +++ b/README.md @@ -129,6 +129,7 @@ CONFIGURATIONS: -r, -resolver string list of resolvers to use (file or comma separated) -wt, -wildcard-threshold int wildcard filter threshold (default 5) -wd, -wildcard-domain string domain name for wildcard filtering (other flags will be ignored - only json output is supported) + -auto-wildcard automatically detect and filter wildcard subdomains per base domain ``` ## Running dnsx @@ -405,6 +406,12 @@ A special feature of `dnsx` is its ability to handle **multi-level DNS based wil dnsx -l subdomain_list.txt -wd airbnb.com -o output.txt ``` +Automatically detect and filter wildcard subdomains per base domain in a single run: + +```console +dnsx -l subdomain_list.txt --auto-wildcard -o output.txt +``` + --------- ### Dnsx as a library @@ -462,7 +469,7 @@ func main() { - As default, `dnsx` checks for **A** record. - As default `dnsx` uses Google, Cloudflare, Quad9 [resolver](https://github.com/projectdiscovery/dnsx/blob/43af78839e237ea8cbafe571df1ab0d6cbe7f445/libs/dnsx/dnsx.go#L31). - Custom resolver list can be loaded using the `r` flag. -- Domain name (`wd`) input is mandatory for wildcard elimination. +- Domain name (`wd`) input is mandatory for wildcard elimination unless `--auto-wildcard` is used. - DNS record flag can not be used when using wildcard filtering. - DNS resolution (`l`) and DNS brute-forcing (`w`) can't be used together. - VPN operators tend to filter high DNS/UDP traffic, therefore the tool might experience packets loss (eg. [Mullvad VPN](https://github.com/projectdiscovery/dnsx/issues/221)). Check [this potential solution](./MULLVAD.md). diff --git a/internal/runner/options.go b/internal/runner/options.go index 0e545bd5..afce61a5 100644 --- a/internal/runner/options.go +++ b/internal/runner/options.go @@ -59,6 +59,7 @@ type Options struct { TraceMaxRecursion int WildcardThreshold int WildcardDomain string + AutoWildcard bool ShowStatistics bool rcodes map[int]struct{} RCode string @@ -189,6 +190,7 @@ func ParseOptions() *Options { flagSet.StringVarP(&options.Resolvers, "resolver", "r", "", "list of resolvers to use (file or comma separated)"), flagSet.IntVarP(&options.WildcardThreshold, "wildcard-threshold", "wt", 5, "wildcard filter threshold"), flagSet.StringVarP(&options.WildcardDomain, "wildcard-domain", "wd", "", "domain name for wildcard filtering (other flags will be ignored - only json output is supported)"), + flagSet.BoolVar(&options.AutoWildcard, "auto-wildcard", false, "automatically detect and filter wildcard subdomains per base domain"), flagSet.StringVar(&options.Proxy, "proxy", "", "proxy to use (eg socks5://127.0.0.1:8080)"), ) @@ -307,10 +309,17 @@ func (options *Options) validateOptions() { if options.WildcardDomain != "" { gologger.Fatal().Msgf("wildcard not supported in stream mode") } + if options.AutoWildcard { + gologger.Fatal().Msgf("auto wildcard not supported in stream mode") + } if options.ShowStatistics { gologger.Fatal().Msgf("stats not supported in stream mode") } } + + if options.WildcardDomain != "" && options.AutoWildcard { + gologger.Fatal().Msgf("wildcard-domain and auto-wildcard can't be used together") + } } func argumentHasStdin(arg string) bool { diff --git a/internal/runner/runner.go b/internal/runner/runner.go index c98e831c..83a5d45b 100644 --- a/internal/runner/runner.go +++ b/internal/runner/runner.go @@ -6,6 +6,7 @@ import ( "encoding/json" "fmt" "io" + "net" "os" "strings" "sync" @@ -39,7 +40,7 @@ type Runner struct { wgwildcardworker *sync.WaitGroup workerchan chan string outputchan chan string - wildcardworkerchan chan string + wildcardworkerchan chan wildcardJob wildcards *mapsutil.SyncLockMap[string, struct{}] wildcardscache map[string][]string wildcardscachemutex sync.Mutex @@ -50,6 +51,11 @@ type Runner struct { aurora aurora.Aurora } +type wildcardJob struct { + host string + wildcardDomain string +} + func New(options *Options) (*Runner, error) { retryabledns.CheckInternalIPs = true @@ -115,9 +121,11 @@ func New(options *Options) (*Runner, error) { } // If no option is specified or wildcard filter has been requested use query type A - if len(questionTypes) == 0 || options.WildcardDomain != "" { + if len(questionTypes) == 0 || options.WildcardDomain != "" || options.AutoWildcard { options.A = true - questionTypes = append(questionTypes, dns.TypeA) + if !hasQuestionType(questionTypes, dns.TypeA) { + questionTypes = append(questionTypes, dns.TypeA) + } } dnsxOptions.QuestionTypes = questionTypes dnsxOptions.QueryAll = options.QueryAll @@ -156,7 +164,7 @@ func New(options *Options) (*Runner, error) { wgresolveworkers: &sync.WaitGroup{}, wgwildcardworker: &sync.WaitGroup{}, workerchan: make(chan string), - wildcardworkerchan: make(chan string), + wildcardworkerchan: make(chan wildcardJob), wildcards: mapsutil.NewSyncLockMap[string, struct{}](), wildcardscache: make(map[string][]string), limiter: limiter, @@ -467,35 +475,85 @@ func (r *Runner) run() error { close(r.outputchan) r.wgoutputworker.Wait() + if r.wildcardFilteringEnabled() { + r.filterWildcards() + } + + return nil +} + +func (r *Runner) wildcardFilteringEnabled() bool { + return r.options.WildcardDomain != "" || r.options.AutoWildcard +} + +func (r *Runner) wildcardDomainForHost(host string) string { if r.options.WildcardDomain != "" { - gologger.Print().Msgf("Starting to filter wildcard subdomains\n") - ipDomain := make(map[string]map[string]struct{}) - listIPs := []string{} - // prepare in memory structure similarly to shuffledns - r.hm.Scan(func(k, v []byte) error { - var dnsdata retryabledns.DNSData - if err := json.Unmarshal(v, &dnsdata); err != nil { - // the item has no record - ignore - return nil - } + return r.options.WildcardDomain + } + if r.options.AutoWildcard { + return wildcardBaseDomain(host) + } + return "" +} - for _, a := range dnsdata.A { - _, ok := ipDomain[a] - if !ok { - ipDomain[a] = make(map[string]struct{}) - listIPs = append(listIPs, a) +func (r *Runner) filterWildcards() { + gologger.Print().Msgf("Starting to filter wildcard subdomains\n") + + type hostSet map[string]struct{} + type ipHosts map[string]hostSet + + domainIPHosts := make(map[string]ipHosts) + domainIPs := make(map[string][]string) + unfilteredHosts := make(map[string]struct{}) + ambiguousHosts := make(map[string]struct{}) + + // prepare in memory structure similarly to shuffledns + r.hm.Scan(func(k, v []byte) error { + var dnsdata retryabledns.DNSData + if err := json.Unmarshal(v, &dnsdata); err != nil { + // the item has no record - ignore + return nil + } + + wildcardDomain := r.wildcardDomainForHost(dnsdata.Host) + if wildcardDomain == "" { + if r.options.AutoWildcard { + if net.ParseIP(dnsdata.Host) != nil || !strings.Contains(dnsdata.Host, ".") { + unfilteredHosts[dnsdata.Host] = struct{}{} + } else { + ambiguousHosts[dnsdata.Host] = struct{}{} } - ipDomain[a][string(k)] = struct{}{} } - return nil - }) + } - gologger.Debug().Msgf("Found %d unique IPs:%s\n", len(listIPs), strings.Join(listIPs, ", ")) + for _, a := range dnsdata.A { + ipDomain, ok := domainIPHosts[wildcardDomain] + if !ok { + ipDomain = make(map[string]hostSet) + domainIPHosts[wildcardDomain] = ipDomain + } + if _, ok := ipDomain[a]; !ok { + ipDomain[a] = make(hostSet) + domainIPs[wildcardDomain] = append(domainIPs[wildcardDomain], a) + } + ipDomain[a][string(k)] = struct{}{} + } + + return nil + }) + + totalIPs := 0 + for _, ips := range domainIPs { + totalIPs += len(ips) + } + + if totalIPs > 0 { + gologger.Debug().Msgf("Found %d unique IPs\n", totalIPs) // wildcard workers numThreads := r.options.Threads - if numThreads > len(listIPs) { - numThreads = len(listIPs) + if numThreads > totalIPs { + numThreads = totalIPs } for i := 0; i < numThreads; i++ { r.wgwildcardworker.Add(1) @@ -503,37 +561,63 @@ func (r *Runner) run() error { } seen := make(map[string]struct{}) - for _, a := range listIPs { - hosts := ipDomain[a] - if len(hosts) >= r.options.WildcardThreshold { - for host := range hosts { - if _, ok := seen[host]; !ok { - seen[host] = struct{}{} - r.wildcardworkerchan <- host + for wildcardDomain, ipDomain := range domainIPHosts { + for _, hosts := range ipDomain { + if len(hosts) >= r.options.WildcardThreshold { + for host := range hosts { + if _, ok := seen[host]; !ok { + seen[host] = struct{}{} + r.wildcardworkerchan <- wildcardJob{host: host, wildcardDomain: wildcardDomain} + } } } } } close(r.wildcardworkerchan) r.wgwildcardworker.Wait() + } else { + close(r.wildcardworkerchan) + } - // we need to restart output - r.startOutputWorker() - seen = make(map[string]struct{}) - seenRemovedSubdomains := make(map[string]struct{}) - numRemovedSubdomains := 0 - for _, A := range listIPs { - for host := range ipDomain[A] { - if host == r.options.WildcardDomain { - if _, ok := seen[host]; !ok { - seen[host] = struct{}{} - _ = r.lookupAndOutput(host) - } + // we need to restart output + r.startOutputWorker() + seen := make(map[string]struct{}) + seenRemovedSubdomains := make(map[string]struct{}) + numRemovedSubdomains := 0 + + for host := range unfilteredHosts { + if _, ok := seen[host]; ok { + continue + } + seen[host] = struct{}{} + if r.options.AutoWildcard { + wildcardDomain := wildcardBaseDomain(host) + if wildcardDomain == "" { + if net.ParseIP(host) != nil || !strings.Contains(host, ".") { + _ = r.lookupAndOutput(host) + } else { + ambiguousHosts[host] = struct{}{} + } + } else { + ambiguousHosts[host] = struct{}{} + } + continue + } + _ = r.lookupAndOutput(host) + } + + for wildcardDomain, ips := range domainIPs { + ipDomain := domainIPHosts[wildcardDomain] + for _, ip := range ips { + for host := range ipDomain[ip] { + if _, ok := seen[host]; ok { + continue + } + seen[host] = struct{}{} + if host == wildcardDomain { + _ = r.lookupAndOutput(host) } else if !r.wildcards.Has(host) { - if _, ok := seen[host]; !ok { - seen[host] = struct{}{} - _ = r.lookupAndOutput(host) - } + _ = r.lookupAndOutput(host) } else { if _, ok := seenRemovedSubdomains[host]; !ok { numRemovedSubdomains++ @@ -542,13 +626,23 @@ func (r *Runner) run() error { } } } - close(r.outputchan) - // waiting output worker - r.wgoutputworker.Wait() - gologger.Print().Msgf("%d wildcard subdomains removed\n", numRemovedSubdomains) } + close(r.outputchan) + // waiting output worker + r.wgoutputworker.Wait() + if r.options.AutoWildcard && len(ambiguousHosts) > 0 { + gologger.Debug().Msgf("Skipped %d ambiguous hosts during auto wildcard filtering\n", len(ambiguousHosts)) + } + gologger.Print().Msgf("%d wildcard subdomains removed\n", numRemovedSubdomains) +} - return nil +func hasQuestionType(questionTypes []uint16, value uint16) bool { + for _, questionType := range questionTypes { + if questionType == value { + return true + } + } + return false } func (r *Runner) lookupAndOutput(host string) error { @@ -731,7 +825,7 @@ func (r *Runner) worker() { } } // if wildcard filtering just store the data - if r.options.WildcardDomain != "" { + if r.wildcardFilteringEnabled() { if err := r.storeDNSData(dnsData.DNSData); err != nil { gologger.Debug().Msgf("Failed to store DNS data for %s: %v\n", domain, err) } @@ -935,13 +1029,13 @@ func (r *Runner) wildcardWorker() { defer r.wgwildcardworker.Done() for { - host, more := <-r.wildcardworkerchan + job, more := <-r.wildcardworkerchan if !more { break } - if r.IsWildcard(host) { + if r.IsWildcard(job.host, job.wildcardDomain) { // mark this host as a wildcard subdomain - _ = r.wildcards.Set(host, struct{}{}) + _ = r.wildcards.Set(job.host, struct{}{}) } } } diff --git a/internal/runner/wildcard.go b/internal/runner/wildcard.go index 5fdfc6a4..ec423eac 100644 --- a/internal/runner/wildcard.go +++ b/internal/runner/wildcard.go @@ -3,11 +3,51 @@ package runner import ( "strings" + iputil "github.com/projectdiscovery/utils/ip" "github.com/rs/xid" ) +var commonSecondLevelDomains = map[string]struct{}{ + "ac": {}, + "co": {}, + "com": {}, + "edu": {}, + "gov": {}, + "mil": {}, + "net": {}, + "org": {}, +} + +func wildcardBaseDomain(host string) string { + host = strings.TrimSpace(strings.TrimSuffix(host, ".")) + if host == "" { + return "" + } + if iputil.IsIP(host) { + return "" + } + + labels := strings.Split(strings.ToLower(host), ".") + if len(labels) < 2 { + return "" + } + + last := labels[len(labels)-1] + second := labels[len(labels)-2] + if len(labels) >= 3 && len(last) == 2 { + if _, ok := commonSecondLevelDomains[second]; ok { + return strings.Join(labels[len(labels)-3:], ".") + } + } + + return strings.Join(labels[len(labels)-2:], ".") +} + // IsWildcard checks if a host is wildcard -func (r *Runner) IsWildcard(host string) bool { +func (r *Runner) IsWildcard(host, wildcardDomain string) bool { + if wildcardDomain == "" { + return false + } orig := make(map[string]struct{}) wildcards := make(map[string]struct{}) @@ -19,7 +59,7 @@ func (r *Runner) IsWildcard(host string) bool { orig[A] = struct{}{} } - subdomainPart := strings.TrimSuffix(host, "."+r.options.WildcardDomain) + subdomainPart := strings.TrimSuffix(host, "."+wildcardDomain) subdomainTokens := strings.Split(subdomainPart, ".") // Build an array by preallocating a slice of a length @@ -27,11 +67,11 @@ func (r *Runner) IsWildcard(host string) bool { // We use a rand prefix at the beginning like %rand%.domain.tld // A permutation is generated for each level of the subdomain. var hosts []string - hosts = append(hosts, r.options.WildcardDomain) + hosts = append(hosts, wildcardDomain) if len(subdomainTokens) > 0 { for i := 1; i < len(subdomainTokens); i++ { - newhost := strings.Join(subdomainTokens[i:], ".") + "." + r.options.WildcardDomain + newhost := strings.Join(subdomainTokens[i:], ".") + "." + wildcardDomain hosts = append(hosts, newhost) } } diff --git a/internal/runner/wildcard_test.go b/internal/runner/wildcard_test.go new file mode 100644 index 00000000..73399764 --- /dev/null +++ b/internal/runner/wildcard_test.go @@ -0,0 +1,26 @@ +package runner + +import "testing" + +func TestWildcardBaseDomain(t *testing.T) { + t.Parallel() + + tests := []struct { + input string + expected string + }{ + {input: "www.example.com", expected: "example.com"}, + {input: "Example.COM", expected: "example.com"}, + {input: "a.b.example.co.uk", expected: "example.co.uk"}, + {input: "example.co.uk", expected: "example.co.uk"}, + {input: "localhost", expected: ""}, + {input: "192.168.0.1", expected: ""}, + {input: "example.com.", expected: "example.com"}, + } + + for _, tt := range tests { + if got := wildcardBaseDomain(tt.input); got != tt.expected { + t.Errorf("wildcardBaseDomain(%q) = %q, want %q", tt.input, got, tt.expected) + } + } +}