Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adds support for authenticated crawling #310

Draft
wants to merge 2 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion cmd/katana/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,16 @@ pipelines offering both headless and non-headless crawling.`)
flagSet.BoolVarP(&options.ShowBrowser, "show-browser", "sb", false, "show the browser on the screen with headless mode"),
flagSet.StringSliceVarP(&options.HeadlessOptionalArguments, "headless-options", "ho", nil, "start headless chrome with additional options", goflags.FileCommaSeparatedStringSliceOptions),
flagSet.BoolVarP(&options.HeadlessNoSandbox, "no-sandbox", "nos", false, "start headless chrome in --no-sandbox mode"),
flagSet.StringVarP(&options.ChromeDataDir, "chrome-data-dir", "cdd", "", "path to store chrome browser data"),
flagSet.StringVarP(&options.SystemChromePath, "system-chrome-path", "scp", "", "use specified chrome browser for headless crawling"),
flagSet.BoolVarP(&options.HeadlessNoIncognito, "no-incognito", "noi", false, "start headless chrome without incognito mode"),
)

flagSet.CreateGroup("project", "Project",
flagSet.StringVarP(&options.NewProject, "new-project", "np", "", "creates new project and opens browser for manual authentication"),
flagSet.StringVarP(&options.CrawlProject, "crawl-project", "cp", "", "use project data while crawling (uses saved auth/session data)"),
flagSet.BoolVarP(&options.ListProject, "list-project", "lp", false, "list all stored projects"),
)

flagSet.CreateGroup("scope", "Scope",
flagSet.StringSliceVarP(&options.Scope, "crawl-scope", "cs", nil, "in scope url regex to be followed by crawler", goflags.FileCommaSeparatedStringSliceOptions),
flagSet.StringSliceVarP(&options.OutOfScope, "crawl-out-scope", "cos", nil, "out of scope url regex to be excluded by crawler", goflags.FileCommaSeparatedStringSliceOptions),
Expand Down
94 changes: 94 additions & 0 deletions internal/runner/executer.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,28 @@
package runner

import (
"fmt"
"log"
"net"
"net/http"
"net/url"
"os"
"strconv"

"github.com/go-rod/rod/lib/launcher"
"github.com/go-rod/rod/lib/utils"
"github.com/projectdiscovery/gologger"
errorutil "github.com/projectdiscovery/utils/errors"
"github.com/remeh/sizedwaitgroup"
)

// ExecuteCrawling executes the crawling main loop
func (r *Runner) ExecuteCrawling() error {
if r.options.NewProject != "" {
r.setupNewProject()
os.Exit(0)
}

inputs := r.parseInputs()
if len(inputs) == 0 {
return errorutil.New("no input provided for crawling")
Expand All @@ -30,3 +45,82 @@ func (r *Runner) ExecuteCrawling() error {
wg.Wait()
return nil
}

// setupNewProject opens browser for manual authentication
func (r *Runner) setupNewProject() {
// create manager instance which manages browser
manager := launcher.NewManager()

// setup manager port
// get a random port without preference
listener, err := net.Listen("tcp", ":0")
if err != nil {
gologger.Fatal().Label("project").Msgf("failed to setup listener for manager got %v", err)
}
managerPort := 9000
if value, ok := (listener.Addr()).(*net.TCPAddr); ok {
managerPort = value.Port
}

//start manager goroutine
go func() {
log.Fatal(http.Serve(listener, manager))
}()

//open browser
go func() {
chromeLauncher, err := launcher.NewManaged("ws://127.0.0.1:" + strconv.Itoa(managerPort))
if err != nil {
panic(err)
}
chromeLauncher.
Leakless(true).
Set("disable-gpu", "true").
Set("ignore-certificate-errors", "true").
Set("ignore-certificate-errors", "1").
Set("disable-crash-reporter", "true").
Set("disable-notifications", "true").
Set("hide-scrollbars", "true").
Set("window-size", fmt.Sprintf("%d,%d", 1080, 1920)).
Set("mute-audio", "true").
Delete("use-mock-keychain").
UserDataDir(r.options.NewProject).
KeepUserDataDir().
Headless(false)

if r.options.UseInstalledChrome {
if chromePath, hasChrome := launcher.LookPath(); hasChrome {
chromeLauncher.Bin(chromePath)
} else {
gologger.Fatal().Label("project").Msgf("chrome browser is not installed")
}
}
if r.options.SystemChromePath != "" {
chromeLauncher.Bin(r.options.SystemChromePath)
}
if r.options.HeadlessNoSandbox {
chromeLauncher.Set("no-sandbox", "true")
}
if r.options.Proxy != "" && r.options.Headless {
proxyURL, err := url.Parse(r.options.Proxy)
if err != nil {
gologger.Fatal().Label("project").Msgf("failed to parse proxy url got %v", err)
}
chromeLauncher.Set("proxy-server", proxyURL.String())
}

if _, err := chromeLauncher.Launch(); err != nil {
gologger.Fatal().Label("project").Msgf("failed to launch chromium got %v", err)
}

utils.Pause()
}()
fmt.Println("Started katana in New Project Mode. Follow below steps to complete creating new project")
fmt.Println("1. You should now see a chromium window, if not locate it")
fmt.Println("2. Login to your desired target in browser")
fmt.Println("3. [Press Enter Key] to complete setup")

// read one char from stdin
fmt.Scanln()
gologger.Verbose().Msgf("new project setup completed")
}
37 changes: 34 additions & 3 deletions internal/runner/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,6 @@ func validateOptions(options *types.Options) error {
if options.Verbose {
gologger.DefaultLogger.SetMaxLevel(levels.LevelVerbose)
}
if len(options.URLs) == 0 && !fileutil.HasStdin() {
return errorutil.New("no inputs specified for crawler")
}
if (options.HeadlessOptionalArguments != nil || options.HeadlessNoSandbox || options.SystemChromePath != "") && !options.Headless {
return errorutil.New("headless mode (-hl) is required if -ho, -nos or -scp are set")
}
Expand All @@ -36,13 +33,39 @@ func validateOptions(options *types.Options) error {
return errorutil.New("specified system chrome binary does not exist")
}
}
if options.NewProject != "" && !filepath.IsAbs(options.NewProject) {
if _, err := os.Stat(getDefaultProjectDir()); err != nil {
// if default save directory does not exist create
if err := os.Mkdir(getDefaultProjectDir(), 0777); err != nil { //nolint
gologger.Fatal().Msgf("failed to create default root directory for katana got %v", err)
}
}
options.NewProject = filepath.Join(getDefaultProjectDir(), options.NewProject)
gologger.Verbose().Msgf("new project created at %v", options.NewProject)
}
if options.CrawlProject != "" {
if options.NewProject != "" {
gologger.Fatal().Msg("cannot create and crawl project at same time")
}
if !filepath.IsAbs(options.CrawlProject) {
// if not absolute path prepend default project directory
options.CrawlProject = filepath.Join(getDefaultProjectDir(), options.CrawlProject)
}
// check if project exists
if _, err := os.Stat(options.CrawlProject); err != nil {
gologger.Fatal().Msgf("project %v does not exist, try creating new project with `-np` flag", err)
}
}
if options.StoreResponseDir != "" && !options.StoreResponse {
gologger.Debug().Msgf("store response directory specified, enabling \"sr\" flag automatically\n")
options.StoreResponse = true
}
if options.Headless && (options.StoreResponse || options.StoreResponseDir != "") {
return errorutil.New("store responses feature is not supported in headless mode")
}
if len(options.URLs) == 0 && !fileutil.HasStdin() && options.NewProject == "" && !options.ListProject {
return errorutil.New("no inputs specified for crawler")
}
gologger.DefaultLogger.SetFormatter(formatter.NewCLI(options.NoColors))
return nil
}
Expand Down Expand Up @@ -123,3 +146,11 @@ func initExampleFormFillConfig() error {
err = yaml.NewEncoder(exampleConfig).Encode(utils.DefaultFormFillData)
return err
}

func getDefaultProjectDir() string {
homedir, err := os.UserHomeDir()
if err != nil {
gologger.Fatal().Msgf("failed to fetch user home directory got %v", err)
}
return filepath.Join(homedir, ".katana")
}
17 changes: 17 additions & 0 deletions internal/runner/runner.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package runner

import (
"fmt"
"os"

"github.com/projectdiscovery/gologger"
"github.com/projectdiscovery/katana/pkg/engine"
"github.com/projectdiscovery/katana/pkg/engine/hybrid"
Expand Down Expand Up @@ -41,6 +44,20 @@ func New(options *types.Options) (*Runner, error) {
return nil, err
}
}
if options.ListProject {
gologger.Info().Msg("katana saved projects:")
saveDir, err := os.ReadDir(getDefaultProjectDir())
if err != nil {
gologger.Fatal().Msgf("saved projects not found got %v", err)
}
for _, v := range saveDir {
if v.IsDir() {
fmt.Println(v.Name())
}
}
os.Exit(0)
}

crawlerOptions, err := types.NewCrawlerOptions(options)
if err != nil {
return nil, errorutil.NewWithErr(err).Msgf("could not create crawler options")
Expand Down
33 changes: 20 additions & 13 deletions pkg/engine/hybrid/hybrid.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,19 +43,17 @@ type Crawler struct {

// New returns a new standard crawler instance
func New(options *types.CrawlerOptions) (*Crawler, error) {
var dataStore string
var err error
if options.Options.ChromeDataDir != "" {
dataStore = options.Options.ChromeDataDir
} else {
dataStore, err = os.MkdirTemp("", "katana-*")
previousPIDs := findChromeProcesses()
var tempDir string = "" // empty if crawlproject is given

if options.Options.CrawlProject == "" {
var err error
tempDir, err = os.MkdirTemp("", "katana-*")
if err != nil {
return nil, errorutil.NewWithTag("hybrid", "could not create temporary directory").Wrap(err)
}
}

previousPIDs := findChromeProcesses()

chromeLauncher := launcher.New().
Leakless(false).
Set("disable-gpu", "true").
Expand All @@ -66,8 +64,13 @@ func New(options *types.CrawlerOptions) (*Crawler, error) {
Set("hide-scrollbars", "true").
Set("window-size", fmt.Sprintf("%d,%d", 1080, 1920)).
Set("mute-audio", "true").
Delete("use-mock-keychain").
UserDataDir(dataStore)
Delete("use-mock-keychain")

if options.Options.CrawlProject != "" {
chromeLauncher.UserDataDir(options.Options.CrawlProject)
} else {
chromeLauncher.UserDataDir(tempDir)
}

if options.Options.UseInstalledChrome {
if chromePath, hasChrome := launcher.LookPath(); hasChrome {
Expand Down Expand Up @@ -117,7 +120,7 @@ func New(options *types.CrawlerOptions) (*Crawler, error) {
options: options,
browser: browser,
previousPIDs: previousPIDs,
tempDir: dataStore,
tempDir: tempDir,
}
if options.Options.KnownFiles != "" {
httpclient, _, err := common.BuildClient(options.Dialer, options.Options, nil)
Expand All @@ -134,7 +137,7 @@ func (c *Crawler) Close() error {
if err := c.browser.Close(); err != nil {
return err
}
if c.options.Options.ChromeDataDir == "" {
if c.tempDir != "" {
if err := os.RemoveAll(c.tempDir); err != nil {
return err
}
Expand Down Expand Up @@ -179,7 +182,7 @@ func (c *Crawler) Crawl(rootURL string) error {

// create a new browser instance (default to incognito mode)
var newBrowser *rod.Browser
if c.options.Options.HeadlessNoIncognito {
if c.options.Options.HeadlessNoIncognito || c.options.Options.CrawlProject != "" {
if err := c.browser.Connect(); err != nil {
return err
}
Expand All @@ -203,6 +206,10 @@ func (c *Crawler) Crawl(rootURL string) error {
break
}
item := queue.Pop()
if item == nil {
// if no elements are present in queue
break
}
req, ok := item.(navigation.Request)
if !ok {
continue
Expand Down
8 changes: 6 additions & 2 deletions pkg/types/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -97,14 +97,18 @@ type Options struct {
StoreResponse bool
// StoreResponseDir specifies if katana should use a custom directory to store http requests/responses
StoreResponseDir string
// ChromeDataDir : Specify the --user-data-dir to chrome binary to preserve sessions
ChromeDataDir string
// HeadlessNoIncognito specifies if chrome should be started without incognito mode
HeadlessNoIncognito bool
// HealthCheck determines if a self-healthcheck should be performed
HealthCheck bool
// ErrorLogFile specifies a file to write with the errors of all requests
ErrorLogFile string
// OpenBrowser opens visible browser window to login
NewProject string
// CrawlProject uses saved session data while crawling
CrawlProject string
// ListProject lists all project
ListProject bool
}

func (options *Options) ParseCustomHeaders() map[string]string {
Expand Down
2 changes: 1 addition & 1 deletion pkg/utils/queue/queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ func (v *VarietyQueue) Pop() interface{} {
v.mutex.Lock()
defer v.mutex.Unlock()

var x interface{}
var x interface{} = nil
if v.queueType == BreadthFirst {
x = v.priorityQueue.Pop()
} else if v.queueType == DepthFirst {
Expand Down