From 6537b327867861ec4eb90d07604f8bb79bdce88d Mon Sep 17 00:00:00 2001 From: Dogan Can Bakir <65292895+dogancanbakir@users.noreply.github.com> Date: Wed, 18 Sep 2024 12:20:53 +0300 Subject: [PATCH] fix negative stats (#1367) * fix negative stats * lint --------- Co-authored-by: Mzack9999 --- v2/pkg/runner/stats.go | 6 ++-- v2/pkg/subscraping/sources/bevigil/bevigil.go | 3 ++ .../sources/binaryedge/binaryedge.go | 3 +- .../sources/bufferover/bufferover.go | 2 +- .../sources/commoncrawl/commoncrawl.go | 2 ++ v2/pkg/subscraping/sources/crtsh/crtsh.go | 2 +- v2/pkg/subscraping/sources/facebook/ctlogs.go | 2 +- v2/pkg/subscraping/sources/gitlab/gitlab.go | 29 +++++++++++++++++-- v2/pkg/subscraping/sources/hunter/hunter.go | 3 ++ .../sources/threatbook/threatbook.go | 1 + 10 files changed, 45 insertions(+), 8 deletions(-) diff --git a/v2/pkg/runner/stats.go b/v2/pkg/runner/stats.go index 55f497a0a..d34b20f82 100644 --- a/v2/pkg/runner/stats.go +++ b/v2/pkg/runner/stats.go @@ -30,12 +30,14 @@ func printStatistics(stats map[string]subscraping.Statistics) { if len(lines) > 0 { gologger.Print().Msgf("\n Source Duration Results Errors\n%s\n", strings.Repeat("─", 56)) - gologger.Print().Msgf("%s\n", strings.Join(lines, "\n")) + gologger.Print().Msg(strings.Join(lines, "\n")) + gologger.Print().Msgf("\n") } if len(skipped) > 0 { gologger.Print().Msgf("\n The following sources were included but skipped...\n\n") - gologger.Print().Msgf("%s\n\n", strings.Join(skipped, "\n")) + gologger.Print().Msg(strings.Join(skipped, "\n")) + gologger.Print().Msgf("\n\n") } } diff --git a/v2/pkg/subscraping/sources/bevigil/bevigil.go b/v2/pkg/subscraping/sources/bevigil/bevigil.go index cb55ea056..893db4913 100644 --- a/v2/pkg/subscraping/sources/bevigil/bevigil.go +++ b/v2/pkg/subscraping/sources/bevigil/bevigil.go @@ -48,6 +48,7 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se }) if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + s.errors++ session.DiscardHTTPResponse(resp) return } @@ -57,6 +58,7 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se err = jsoniter.NewDecoder(resp.Body).Decode(&response) if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + s.errors++ resp.Body.Close() return } @@ -69,6 +71,7 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se for _, subdomain := range subdomains { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + s.results++ } }() diff --git a/v2/pkg/subscraping/sources/binaryedge/binaryedge.go b/v2/pkg/subscraping/sources/binaryedge/binaryedge.go index 7684ff602..d798fe702 100644 --- a/v2/pkg/subscraping/sources/binaryedge/binaryedge.go +++ b/v2/pkg/subscraping/sources/binaryedge/binaryedge.go @@ -3,6 +3,7 @@ package binaryedge import ( "context" + "errors" "fmt" "math" "net/url" @@ -119,7 +120,7 @@ func (s *Source) enumerate(ctx context.Context, session *subscraping.Session, ba // Check error messages if response.Message != "" && response.Status != nil { - results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: fmt.Errorf("%s", response.Message)} + results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: errors.New(response.Message)} s.errors++ return } diff --git a/v2/pkg/subscraping/sources/bufferover/bufferover.go b/v2/pkg/subscraping/sources/bufferover/bufferover.go index cf2771436..bbfc884a3 100644 --- a/v2/pkg/subscraping/sources/bufferover/bufferover.go +++ b/v2/pkg/subscraping/sources/bufferover/bufferover.go @@ -97,8 +97,8 @@ func (s *Source) getData(ctx context.Context, sourceURL string, apiKey string, s for _, subdomain := range subdomains { for _, value := range session.Extractor.Extract(subdomain) { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: value} + s.results++ } - s.results++ } } diff --git a/v2/pkg/subscraping/sources/commoncrawl/commoncrawl.go b/v2/pkg/subscraping/sources/commoncrawl/commoncrawl.go index de46bbb7f..34e50be9e 100644 --- a/v2/pkg/subscraping/sources/commoncrawl/commoncrawl.go +++ b/v2/pkg/subscraping/sources/commoncrawl/commoncrawl.go @@ -131,6 +131,7 @@ func (s *Source) getSubdomains(ctx context.Context, searchURL, domain string, se resp, err := session.Get(ctx, fmt.Sprintf("%s?url=*.%s", searchURL, domain), "", headers) if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + s.errors++ session.DiscardHTTPResponse(resp) return false } @@ -150,6 +151,7 @@ func (s *Source) getSubdomains(ctx context.Context, searchURL, domain string, se subdomain = strings.TrimPrefix(subdomain, "2f") results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + s.results++ } } } diff --git a/v2/pkg/subscraping/sources/crtsh/crtsh.go b/v2/pkg/subscraping/sources/crtsh/crtsh.go index e921bb017..4d008349f 100644 --- a/v2/pkg/subscraping/sources/crtsh/crtsh.go +++ b/v2/pkg/subscraping/sources/crtsh/crtsh.go @@ -156,8 +156,8 @@ func (s *Source) getSubdomainsFromHTTP(ctx context.Context, domain string, sessi for _, value := range session.Extractor.Extract(sub) { if value != "" { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: value} + s.results++ } - s.results++ } } } diff --git a/v2/pkg/subscraping/sources/facebook/ctlogs.go b/v2/pkg/subscraping/sources/facebook/ctlogs.go index 9a94a3c3f..3f19af736 100644 --- a/v2/pkg/subscraping/sources/facebook/ctlogs.go +++ b/v2/pkg/subscraping/sources/facebook/ctlogs.go @@ -122,8 +122,8 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se } for _, v := range response.Data { for _, domain := range v.Domains { - s.results++ results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: domain} + s.results++ } } if response.Paging.Next == "" { diff --git a/v2/pkg/subscraping/sources/gitlab/gitlab.go b/v2/pkg/subscraping/sources/gitlab/gitlab.go index 82540adf9..9477b0bdf 100644 --- a/v2/pkg/subscraping/sources/gitlab/gitlab.go +++ b/v2/pkg/subscraping/sources/gitlab/gitlab.go @@ -9,6 +9,7 @@ import ( "regexp" "strings" "sync" + "time" jsoniter "github.com/json-iterator/go" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping" @@ -17,7 +18,11 @@ import ( // Source is the passive scraping agent type Source struct { - apiKeys []string + apiKeys []string + timeTaken time.Duration + errors int + results int + skipped bool } type item struct { @@ -30,9 +35,14 @@ type item struct { // Run function returns all subdomains found with the service func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { results := make(chan subscraping.Result) + s.errors = 0 + s.results = 0 go func() { - defer close(results) + defer func(startTime time.Time) { + s.timeTaken = time.Since(startTime) + close(results) + }(time.Now()) randomApiKey := subscraping.PickRandom(s.apiKeys, s.Name()) if randomApiKey == "" { @@ -59,6 +69,7 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp * resp, err := session.Get(ctx, searchURL, "", headers) if err != nil && resp == nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + s.errors++ session.DiscardHTTPResponse(resp) return } @@ -69,6 +80,7 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp * err = jsoniter.NewDecoder(resp.Body).Decode(&items) if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + s.errors++ return } @@ -85,6 +97,7 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp * session.DiscardHTTPResponse(resp) results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + s.errors++ return } } @@ -98,6 +111,7 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp * } for _, subdomain := range domainRegexp.FindAllString(line, -1) { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + s.results++ } } resp.Body.Close() @@ -114,6 +128,7 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp * nextURL, err := url.QueryUnescape(link.URL) if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + s.errors++ return } @@ -149,3 +164,13 @@ func (s *Source) NeedsKey() bool { func (s *Source) AddApiKeys(keys []string) { s.apiKeys = keys } + +// Statistics returns the statistics for the source +func (s *Source) Statistics() subscraping.Statistics { + return subscraping.Statistics{ + Errors: s.errors, + Results: s.results, + TimeTaken: s.timeTaken, + Skipped: s.skipped, + } +} diff --git a/v2/pkg/subscraping/sources/hunter/hunter.go b/v2/pkg/subscraping/sources/hunter/hunter.go index efd68bb40..dc3b77021 100644 --- a/v2/pkg/subscraping/sources/hunter/hunter.go +++ b/v2/pkg/subscraping/sources/hunter/hunter.go @@ -72,6 +72,7 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se err = jsoniter.NewDecoder(resp.Body).Decode(&response) if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + s.errors++ resp.Body.Close() return } @@ -81,6 +82,7 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se results <- subscraping.Result{ Source: s.Name(), Type: subscraping.Error, Error: fmt.Errorf("%s", response.Message), } + s.errors++ return } @@ -88,6 +90,7 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se for _, hunterInfo := range response.Data.InfoArr { subdomain := hunterInfo.Domain results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + s.results++ } } pages = int(response.Data.Total/1000) + 1 diff --git a/v2/pkg/subscraping/sources/threatbook/threatbook.go b/v2/pkg/subscraping/sources/threatbook/threatbook.go index 1befbd983..1c10a8d10 100644 --- a/v2/pkg/subscraping/sources/threatbook/threatbook.go +++ b/v2/pkg/subscraping/sources/threatbook/threatbook.go @@ -88,6 +88,7 @@ func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Se if total > 0 { for _, subdomain := range response.Data.SubDomains.Data { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + s.results++ } } }()