diff --git a/README.md b/README.md index e3360cace..0b217390a 100644 --- a/README.md +++ b/README.md @@ -147,6 +147,8 @@ shodan: github: - ghp_lkyJGU3jv1xmwk4SDXavrLDJ4dl2pSJMzj4X - ghp_gkUuhkIYdQPj13ifH4KA3cXRn8JD2lqir2d4 +gitlab: + - glpat-khpkq8eifH42Kski4Uzy zoomeye: - ZOOMEYE_USERNAME:ZOOMEYE_PASSWORD quake: diff --git a/v2/pkg/passive/sources.go b/v2/pkg/passive/sources.go index 700190464..ef74fc635 100644 --- a/v2/pkg/passive/sources.go +++ b/v2/pkg/passive/sources.go @@ -28,6 +28,7 @@ import ( "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/fofa" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/fullhunt" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/github" + "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/gitlab" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/hackertarget" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/hunter" "github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/intelx" @@ -91,6 +92,7 @@ var AllSources = [...]subscraping.Source{ &facebook.Source{}, // &threatminer.Source{}, // failing api // &reconcloud.Source{}, // failing due to cloudflare bot protection + &gitlab.Source{}, } var NameSourceMap = make(map[string]subscraping.Source, len(AllSources)) diff --git a/v2/pkg/passive/sources_test.go b/v2/pkg/passive/sources_test.go index 34323c1af..ecaee8a16 100644 --- a/v2/pkg/passive/sources_test.go +++ b/v2/pkg/passive/sources_test.go @@ -52,6 +52,7 @@ var ( "facebook", // "threatminer", // "reconcloud", + "gitlab", } expectedDefaultSources = []string{ diff --git a/v2/pkg/runner/options.go b/v2/pkg/runner/options.go index 9f1219011..961110abb 100644 --- a/v2/pkg/runner/options.go +++ b/v2/pkg/runner/options.go @@ -249,7 +249,7 @@ func userHomeDir() string { var defaultRateLimits = []string{ "github=30/m", - // "gitlab=2000/m", + "gitlab=10/m", "fullhunt=60/m", fmt.Sprintf("robtex=%d/ms", uint(math.MaxUint)), "securitytrails=1/s", diff --git a/v2/pkg/subscraping/sources/gitlab/gitlab.go b/v2/pkg/subscraping/sources/gitlab/gitlab.go index 2dc76634a..8005414de 100644 --- a/v2/pkg/subscraping/sources/gitlab/gitlab.go +++ b/v2/pkg/subscraping/sources/gitlab/gitlab.go @@ -18,39 +18,42 @@ import ( // Source is the passive scraping agent type Source struct { - apiKeys []string -} - -type item struct { - Data string `json:"data"` - ProjectId int `json:"project_id"` - Path string `json:"path"` - Ref string `json:"ref"` + apiKeys []string + timeTaken time.Duration + errors int + results int + skipped bool } // Run function returns all subdomains found with the service func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result { results := make(chan subscraping.Result) + s.errors = 0 + s.results = 0 + // "wiki_blobs", "commits", "blobs", "notes" are moved to GitLab Premium in 13.9. ref: https://docs.gitlab.com/ee/api/search.html#scope-wiki_blobs + scopes := []string{"projects", "issues", "merge_requests", "milestones", "snippet_titles", "users"} go func() { - defer close(results) + defer func(startTime time.Time) { + s.timeTaken = time.Since(startTime) + close(results) + }(time.Now()) randomApiKey := subscraping.PickRandom(s.apiKeys, s.Name()) if randomApiKey == "" { return } - headers := map[string]string{"PRIVATE-TOKEN": randomApiKey} - - searchURL := fmt.Sprintf("https://gitlab.com/api/v4/search?scope=blobs&search=%s&per_page=100", domain) - s.enumerate(ctx, searchURL, domainRegexp(domain), headers, session, results) - + for _, scope := range scopes { + searchURL := fmt.Sprintf("https://gitlab.com/api/v4/search?scope=%s&search=%s&order_by=created_at&sort=desc&per_page=1000", scope, domain) + s.enumerate(ctx, searchURL, scope, domainRegexp(domain), headers, session, results) + } }() return results } -func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp *regexp.Regexp, headers map[string]string, session *subscraping.Session, results chan subscraping.Result) { +func (s *Source) enumerate(ctx context.Context, searchURL string, scope string, domainRegexp *regexp.Regexp, headers map[string]string, session *subscraping.Session, results chan subscraping.Result) { select { case <-ctx.Done(): return @@ -60,13 +63,14 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp * resp, err := session.Get(ctx, searchURL, "", headers) if err != nil && resp == nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + s.errors++ session.DiscardHTTPResponse(resp) return } defer resp.Body.Close() - var items []item + var items []map[string]interface{} err = jsoniter.NewDecoder(resp.Body).Decode(&items) if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} @@ -77,15 +81,16 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp * wg.Add(len(items)) for _, it := range items { - go func(item item) { + go func(item map[string]interface{}) { + defer wg.Done() // The original item.Path causes 404 error because the Gitlab API is expecting the url encoded path - fileUrl := fmt.Sprintf("https://gitlab.com/api/v4/projects/%d/repository/files/%s/raw?ref=%s", item.ProjectId, url.QueryEscape(item.Path), item.Ref) + fileUrl := buildUrl(scope, item) resp, err := session.Get(ctx, fileUrl, "", headers) if err != nil { if resp == nil || (resp != nil && resp.StatusCode != http.StatusNotFound) { session.DiscardHTTPResponse(resp) - results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + s.errors++ return } } @@ -99,11 +104,11 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp * } for _, subdomain := range domainRegexp.FindAllString(line, -1) { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain} + s.results++ } } resp.Body.Close() } - defer wg.Done() }(it) } @@ -115,19 +120,38 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp * nextURL, err := url.QueryUnescape(link.URL) if err != nil { results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err} + s.errors++ return } - // TODO: hardcoded for testing, should be a source internal rate limit #718 - time.Sleep(2 * time.Second) - - s.enumerate(ctx, nextURL, domainRegexp, headers, session, results) + s.enumerate(ctx, nextURL, scope, domainRegexp, headers, session, results) } } wg.Wait() } +func buildUrl(scope string, t_item map[string]interface{}) string { + if webURL, exists := t_item["web_url"].(string); exists { + switch scope { + case "projects", "issues", "merge_requests", "milestones", "snippet_titles", "users", "commits", "wiki_blobs": + return webURL + } + } + + if scope == "blobs" { + if path, ok := t_item["path"].(string); ok { + encodedPath := strings.Replace(path, "/", "%2f", -1) + if projectID, ok := t_item["project_id"].(int); ok { + if ref, ok := t_item["ref"].(string); ok { + return fmt.Sprintf("https://gitlab.com/api/v4/projects/%d/repository/files/%s/raw?ref=%s", projectID, encodedPath, ref) + } + } + } + } + return "" +} + func domainRegexp(domain string) *regexp.Regexp { rdomain := strings.ReplaceAll(domain, ".", "\\.") return regexp.MustCompile("(\\w[a-zA-Z0-9][a-zA-Z0-9-\\.]*)" + rdomain) @@ -153,3 +177,12 @@ func (s *Source) NeedsKey() bool { func (s *Source) AddApiKeys(keys []string) { s.apiKeys = keys } + +func (s *Source) Statistics() subscraping.Statistics { + return subscraping.Statistics{ + Errors: s.errors, + Results: s.results, + TimeTaken: s.timeTaken, + Skipped: s.skipped, + } +}