Skip to content

Commit

Permalink
enable gitlab
Browse files Browse the repository at this point in the history
  • Loading branch information
dogancanbakir committed Aug 23, 2023
1 parent 2acb1b3 commit d49a95b
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 25 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ shodan:
github:
- ghp_lkyJGU3jv1xmwk4SDXavrLDJ4dl2pSJMzj4X
- ghp_gkUuhkIYdQPj13ifH4KA3cXRn8JD2lqir2d4
gitlab:
- glpat-khpkq8eifH42Kski4Uzy
zoomeye:
- ZOOMEYE_USERNAME:ZOOMEYE_PASSWORD
quake:
Expand Down
2 changes: 2 additions & 0 deletions v2/pkg/passive/sources.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/fofa"
"github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/fullhunt"
"github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/github"
"github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/gitlab"
"github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/hackertarget"
"github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/hunter"
"github.com/projectdiscovery/subfinder/v2/pkg/subscraping/sources/intelx"
Expand Down Expand Up @@ -91,6 +92,7 @@ var AllSources = [...]subscraping.Source{
&facebook.Source{},
// &threatminer.Source{}, // failing api
// &reconcloud.Source{}, // failing due to cloudflare bot protection
&gitlab.Source{},
}

var NameSourceMap = make(map[string]subscraping.Source, len(AllSources))
Expand Down
1 change: 1 addition & 0 deletions v2/pkg/passive/sources_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ var (
"facebook",
// "threatminer",
// "reconcloud",
"gitlab",
}

expectedDefaultSources = []string{
Expand Down
2 changes: 1 addition & 1 deletion v2/pkg/runner/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,7 @@ func userHomeDir() string {

var defaultRateLimits = []string{
"github=30/m",
// "gitlab=2000/m",
"gitlab=10/m",
"fullhunt=60/m",
fmt.Sprintf("robtex=%d/ms", uint(math.MaxUint)),
"securitytrails=1/s",
Expand Down
81 changes: 57 additions & 24 deletions v2/pkg/subscraping/sources/gitlab/gitlab.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,39 +18,42 @@ import (

// Source is the passive scraping agent
type Source struct {
apiKeys []string
}

type item struct {
Data string `json:"data"`
ProjectId int `json:"project_id"`
Path string `json:"path"`
Ref string `json:"ref"`
apiKeys []string
timeTaken time.Duration
errors int
results int
skipped bool
}

// Run function returns all subdomains found with the service
func (s *Source) Run(ctx context.Context, domain string, session *subscraping.Session) <-chan subscraping.Result {
results := make(chan subscraping.Result)
s.errors = 0
s.results = 0

// "wiki_blobs", "commits", "blobs", "notes" are moved to GitLab Premium in 13.9. ref: https://docs.gitlab.com/ee/api/search.html#scope-wiki_blobs
scopes := []string{"projects", "issues", "merge_requests", "milestones", "snippet_titles", "users"}
go func() {
defer close(results)
defer func(startTime time.Time) {
s.timeTaken = time.Since(startTime)
close(results)
}(time.Now())

randomApiKey := subscraping.PickRandom(s.apiKeys, s.Name())
if randomApiKey == "" {
return
}

headers := map[string]string{"PRIVATE-TOKEN": randomApiKey}

searchURL := fmt.Sprintf("https://gitlab.com/api/v4/search?scope=blobs&search=%s&per_page=100", domain)
s.enumerate(ctx, searchURL, domainRegexp(domain), headers, session, results)

for _, scope := range scopes {
searchURL := fmt.Sprintf("https://gitlab.com/api/v4/search?scope=%s&search=%s&order_by=created_at&sort=desc&per_page=1000", scope, domain)
s.enumerate(ctx, searchURL, scope, domainRegexp(domain), headers, session, results)
}
}()

return results
}

func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp *regexp.Regexp, headers map[string]string, session *subscraping.Session, results chan subscraping.Result) {
func (s *Source) enumerate(ctx context.Context, searchURL string, scope string, domainRegexp *regexp.Regexp, headers map[string]string, session *subscraping.Session, results chan subscraping.Result) {
select {
case <-ctx.Done():
return
Expand All @@ -60,13 +63,14 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp *
resp, err := session.Get(ctx, searchURL, "", headers)
if err != nil && resp == nil {
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
s.errors++
session.DiscardHTTPResponse(resp)
return
}

defer resp.Body.Close()

var items []item
var items []map[string]interface{}
err = jsoniter.NewDecoder(resp.Body).Decode(&items)
if err != nil {
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
Expand All @@ -77,15 +81,16 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp *
wg.Add(len(items))

for _, it := range items {
go func(item item) {
go func(item map[string]interface{}) {
defer wg.Done()
// The original item.Path causes 404 error because the Gitlab API is expecting the url encoded path
fileUrl := fmt.Sprintf("https://gitlab.com/api/v4/projects/%d/repository/files/%s/raw?ref=%s", item.ProjectId, url.QueryEscape(item.Path), item.Ref)
fileUrl := buildUrl(scope, item)
resp, err := session.Get(ctx, fileUrl, "", headers)
if err != nil {
if resp == nil || (resp != nil && resp.StatusCode != http.StatusNotFound) {
session.DiscardHTTPResponse(resp)

results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
s.errors++
return
}
}
Expand All @@ -99,11 +104,11 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp *
}
for _, subdomain := range domainRegexp.FindAllString(line, -1) {
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Subdomain, Value: subdomain}
s.results++
}
}
resp.Body.Close()
}
defer wg.Done()
}(it)
}

Expand All @@ -115,19 +120,38 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp *
nextURL, err := url.QueryUnescape(link.URL)
if err != nil {
results <- subscraping.Result{Source: s.Name(), Type: subscraping.Error, Error: err}
s.errors++
return
}

// TODO: hardcoded for testing, should be a source internal rate limit #718
time.Sleep(2 * time.Second)

s.enumerate(ctx, nextURL, domainRegexp, headers, session, results)
s.enumerate(ctx, nextURL, scope, domainRegexp, headers, session, results)
}
}

wg.Wait()
}

func buildUrl(scope string, t_item map[string]interface{}) string {
if webURL, exists := t_item["web_url"].(string); exists {
switch scope {
case "projects", "issues", "merge_requests", "milestones", "snippet_titles", "users", "commits", "wiki_blobs":
return webURL
}
}

if scope == "blobs" {
if path, ok := t_item["path"].(string); ok {
encodedPath := strings.Replace(path, "/", "%2f", -1)
if projectID, ok := t_item["project_id"].(int); ok {
if ref, ok := t_item["ref"].(string); ok {
return fmt.Sprintf("https://gitlab.com/api/v4/projects/%d/repository/files/%s/raw?ref=%s", projectID, encodedPath, ref)
}
}
}
}
return ""
}

func domainRegexp(domain string) *regexp.Regexp {
rdomain := strings.ReplaceAll(domain, ".", "\\.")
return regexp.MustCompile("(\\w[a-zA-Z0-9][a-zA-Z0-9-\\.]*)" + rdomain)
Expand All @@ -153,3 +177,12 @@ func (s *Source) NeedsKey() bool {
func (s *Source) AddApiKeys(keys []string) {
s.apiKeys = keys
}

func (s *Source) Statistics() subscraping.Statistics {
return subscraping.Statistics{
Errors: s.errors,
Results: s.results,
TimeTaken: s.timeTaken,
Skipped: s.skipped,
}
}

0 comments on commit d49a95b

Please sign in to comment.