From cfa920b7fff920e5bac1c5b9c8f922aa0c56b27b Mon Sep 17 00:00:00 2001 From: Tarun Koyalwar Date: Tue, 14 Feb 2023 00:48:51 +0530 Subject: [PATCH 1/2] adds auth crawl support --- cmd/katana/main.go | 7 ++- internal/runner/executer.go | 94 +++++++++++++++++++++++++++++++++++++ internal/runner/options.go | 37 +++++++++++++-- internal/runner/runner.go | 17 +++++++ pkg/engine/hybrid/hybrid.go | 33 ++++++++----- pkg/types/options.go | 8 +++- pkg/utils/queue/queue.go | 2 +- 7 files changed, 178 insertions(+), 20 deletions(-) diff --git a/cmd/katana/main.go b/cmd/katana/main.go index 86d42cae..f7c73d79 100644 --- a/cmd/katana/main.go +++ b/cmd/katana/main.go @@ -95,11 +95,16 @@ pipelines offering both headless and non-headless crawling.`) flagSet.BoolVarP(&options.ShowBrowser, "show-browser", "sb", false, "show the browser on the screen with headless mode"), flagSet.StringSliceVarP(&options.HeadlessOptionalArguments, "headless-options", "ho", nil, "start headless chrome with additional options", goflags.FileCommaSeparatedStringSliceOptions), flagSet.BoolVarP(&options.HeadlessNoSandbox, "no-sandbox", "nos", false, "start headless chrome in --no-sandbox mode"), - flagSet.StringVarP(&options.ChromeDataDir, "chrome-data-dir", "cdd", "", "path to store chrome browser data"), flagSet.StringVarP(&options.SystemChromePath, "system-chrome-path", "scp", "", "use specified chrome browser for headless crawling"), flagSet.BoolVarP(&options.HeadlessNoIncognito, "no-incognito", "noi", false, "start headless chrome without incognito mode"), ) + flagSet.CreateGroup("project", "Project", + flagSet.StringVarP(&options.NewProject, "new-project", "np", "", "creates new project and opens browser for manual authentication"), + flagSet.StringVarP(&options.CrawlProject, "crawl-project", "cp", "", "use project data while crawling (uses saved auth/session data)"), + flagSet.BoolVarP(&options.ListProject, "list-project", "lp", false, "list all stored projects"), + ) + flagSet.CreateGroup("scope", "Scope", flagSet.StringSliceVarP(&options.Scope, "crawl-scope", "cs", nil, "in scope url regex to be followed by crawler", goflags.FileCommaSeparatedStringSliceOptions), flagSet.StringSliceVarP(&options.OutOfScope, "crawl-out-scope", "cos", nil, "out of scope url regex to be excluded by crawler", goflags.FileCommaSeparatedStringSliceOptions), diff --git a/internal/runner/executer.go b/internal/runner/executer.go index b6acddcc..14497976 100644 --- a/internal/runner/executer.go +++ b/internal/runner/executer.go @@ -1,6 +1,16 @@ package runner import ( + "fmt" + "log" + "net" + "net/http" + "net/url" + "os" + "strconv" + + "github.com/go-rod/rod/lib/launcher" + "github.com/go-rod/rod/lib/utils" "github.com/projectdiscovery/gologger" errorutil "github.com/projectdiscovery/utils/errors" "github.com/remeh/sizedwaitgroup" @@ -8,6 +18,11 @@ import ( // ExecuteCrawling executes the crawling main loop func (r *Runner) ExecuteCrawling() error { + if r.options.NewProject != "" { + r.setupNewProject() + os.Exit(0) + } + inputs := r.parseInputs() if len(inputs) == 0 { return errorutil.New("no input provided for crawling") @@ -30,3 +45,82 @@ func (r *Runner) ExecuteCrawling() error { wg.Wait() return nil } + +// setupNewProject opens browser for manual authentication +func (r *Runner) setupNewProject() { + // create manager instance which manages browser + manager := launcher.NewManager() + + // setup manager port + // get a random port without preference + listener, err := net.Listen("tcp", ":0") + if err != nil { + gologger.Fatal().Label("project").Msgf("failed to setup listener for manager got %v", err) + } + managerPort := 9000 + if value, ok := (listener.Addr()).(*net.TCPAddr); ok { + managerPort = value.Port + } + + //start manager goroutine + go func() { + log.Fatal(http.Serve(listener, manager)) + }() + + //open browser + go func() { + chromeLauncher, err := launcher.NewManaged("ws://127.0.0.1:" + strconv.Itoa(managerPort)) + if err != nil { + panic(err) + } + chromeLauncher. + Leakless(true). + Set("disable-gpu", "true"). + Set("ignore-certificate-errors", "true"). + Set("ignore-certificate-errors", "1"). + Set("disable-crash-reporter", "true"). + Set("disable-notifications", "true"). + Set("hide-scrollbars", "true"). + Set("window-size", fmt.Sprintf("%d,%d", 1080, 1920)). + Set("mute-audio", "true"). + Delete("use-mock-keychain"). + UserDataDir(r.options.NewProject). + KeepUserDataDir(). + Headless(false) + + if r.options.UseInstalledChrome { + if chromePath, hasChrome := launcher.LookPath(); hasChrome { + chromeLauncher.Bin(chromePath) + } else { + gologger.Fatal().Label("project").Msgf("chrome browser is not installed") + } + } + if r.options.SystemChromePath != "" { + chromeLauncher.Bin(r.options.SystemChromePath) + } + if r.options.HeadlessNoSandbox { + chromeLauncher.Set("no-sandbox", "true") + } + if r.options.Proxy != "" && r.options.Headless { + proxyURL, err := url.Parse(r.options.Proxy) + if err != nil { + gologger.Fatal().Label("project").Msgf("failed to parse proxy url got %v", err) + } + chromeLauncher.Set("proxy-server", proxyURL.String()) + } + + if _, err := chromeLauncher.Launch(); err != nil { + gologger.Fatal().Label("project").Msgf("failed to launch chromium got %v", err) + } + + utils.Pause() + }() + fmt.Println("Started katana in New Project Mode. Follow below steps to complete creating new project") + fmt.Println("1. You should now see a chromium window, if not locate it") + fmt.Println("2. Login to your desired target in browser") + fmt.Println("3. [Press Enter Key] to complete setup") + + // read one char from stdin + fmt.Scanln() + gologger.Verbose().Msgf("new project setup completed") +} diff --git a/internal/runner/options.go b/internal/runner/options.go index e72e1a3d..9c1f692b 100644 --- a/internal/runner/options.go +++ b/internal/runner/options.go @@ -25,9 +25,6 @@ func validateOptions(options *types.Options) error { if options.Verbose { gologger.DefaultLogger.SetMaxLevel(levels.LevelVerbose) } - if len(options.URLs) == 0 && !fileutil.HasStdin() { - return errorutil.New("no inputs specified for crawler") - } if (options.HeadlessOptionalArguments != nil || options.HeadlessNoSandbox || options.SystemChromePath != "") && !options.Headless { return errorutil.New("headless mode (-hl) is required if -ho, -nos or -scp are set") } @@ -36,6 +33,29 @@ func validateOptions(options *types.Options) error { return errorutil.New("specified system chrome binary does not exist") } } + if options.NewProject != "" && !filepath.IsAbs(options.NewProject) { + if _, err := os.Stat(getDefaultProjectDir()); err != nil { + // if default save directory does not exist create + if err := os.Mkdir(getDefaultProjectDir(), 0777); err != nil { //nolint + gologger.Fatal().Msgf("failed to create default root directory for katana got %v", err) + } + } + options.NewProject = filepath.Join(getDefaultProjectDir(), options.NewProject) + gologger.Verbose().Msgf("new project created at %v", options.NewProject) + } + if options.CrawlProject != "" { + if options.NewProject != "" { + gologger.Fatal().Msg("cannot create and crawl project at same time") + } + if !filepath.IsAbs(options.CrawlProject) { + // if not absolute path prepend default project directory + options.CrawlProject = filepath.Join(getDefaultProjectDir(), options.CrawlProject) + } + // check if project exists + if _, err := os.Stat(options.CrawlProject); err != nil { + gologger.Fatal().Msgf("project %v does not exist, try creating new project with `-np` flag") + } + } if options.StoreResponseDir != "" && !options.StoreResponse { gologger.Debug().Msgf("store response directory specified, enabling \"sr\" flag automatically\n") options.StoreResponse = true @@ -43,6 +63,9 @@ func validateOptions(options *types.Options) error { if options.Headless && (options.StoreResponse || options.StoreResponseDir != "") { return errorutil.New("store responses feature is not supported in headless mode") } + if len(options.URLs) == 0 && !fileutil.HasStdin() && options.NewProject == "" && !options.ListProject { + return errorutil.New("no inputs specified for crawler") + } gologger.DefaultLogger.SetFormatter(formatter.NewCLI(options.NoColors)) return nil } @@ -123,3 +146,11 @@ func initExampleFormFillConfig() error { err = yaml.NewEncoder(exampleConfig).Encode(utils.DefaultFormFillData) return err } + +func getDefaultProjectDir() string { + homedir, err := os.UserHomeDir() + if err != nil { + gologger.Fatal().Msgf("failed to fetch user home directory got %v", err) + } + return filepath.Join(homedir, ".katana") +} diff --git a/internal/runner/runner.go b/internal/runner/runner.go index 3561c271..e255a4fb 100644 --- a/internal/runner/runner.go +++ b/internal/runner/runner.go @@ -1,6 +1,9 @@ package runner import ( + "fmt" + "os" + "github.com/projectdiscovery/gologger" "github.com/projectdiscovery/katana/pkg/engine" "github.com/projectdiscovery/katana/pkg/engine/hybrid" @@ -41,6 +44,20 @@ func New(options *types.Options) (*Runner, error) { return nil, err } } + if options.ListProject { + gologger.Info().Msg("katana saved projects:") + saveDir, err := os.ReadDir(getDefaultProjectDir()) + if err != nil { + gologger.Fatal().Msgf("saved projects not found got %v", err) + } + for _, v := range saveDir { + if v.IsDir() { + fmt.Println(v.Name()) + } + } + os.Exit(0) + } + crawlerOptions, err := types.NewCrawlerOptions(options) if err != nil { return nil, errorutil.NewWithErr(err).Msgf("could not create crawler options") diff --git a/pkg/engine/hybrid/hybrid.go b/pkg/engine/hybrid/hybrid.go index fb371793..e31ca49f 100644 --- a/pkg/engine/hybrid/hybrid.go +++ b/pkg/engine/hybrid/hybrid.go @@ -43,19 +43,17 @@ type Crawler struct { // New returns a new standard crawler instance func New(options *types.CrawlerOptions) (*Crawler, error) { - var dataStore string - var err error - if options.Options.ChromeDataDir != "" { - dataStore = options.Options.ChromeDataDir - } else { - dataStore, err = os.MkdirTemp("", "katana-*") + previousPIDs := findChromeProcesses() + var tempDir string = "" // empty if crawlproject is given + + if options.Options.CrawlProject == "" { + var err error + tempDir, err = os.MkdirTemp("", "katana-*") if err != nil { return nil, errorutil.NewWithTag("hybrid", "could not create temporary directory").Wrap(err) } } - previousPIDs := findChromeProcesses() - chromeLauncher := launcher.New(). Leakless(false). Set("disable-gpu", "true"). @@ -66,8 +64,13 @@ func New(options *types.CrawlerOptions) (*Crawler, error) { Set("hide-scrollbars", "true"). Set("window-size", fmt.Sprintf("%d,%d", 1080, 1920)). Set("mute-audio", "true"). - Delete("use-mock-keychain"). - UserDataDir(dataStore) + Delete("use-mock-keychain") + + if options.Options.CrawlProject != "" { + chromeLauncher.UserDataDir(options.Options.CrawlProject) + } else { + chromeLauncher.UserDataDir(tempDir) + } if options.Options.UseInstalledChrome { if chromePath, hasChrome := launcher.LookPath(); hasChrome { @@ -117,7 +120,7 @@ func New(options *types.CrawlerOptions) (*Crawler, error) { options: options, browser: browser, previousPIDs: previousPIDs, - tempDir: dataStore, + tempDir: tempDir, } if options.Options.KnownFiles != "" { httpclient, _, err := common.BuildClient(options.Dialer, options.Options, nil) @@ -134,7 +137,7 @@ func (c *Crawler) Close() error { if err := c.browser.Close(); err != nil { return err } - if c.options.Options.ChromeDataDir == "" { + if c.tempDir != "" { if err := os.RemoveAll(c.tempDir); err != nil { return err } @@ -179,7 +182,7 @@ func (c *Crawler) Crawl(rootURL string) error { // create a new browser instance (default to incognito mode) var newBrowser *rod.Browser - if c.options.Options.HeadlessNoIncognito { + if c.options.Options.HeadlessNoIncognito || c.options.Options.CrawlProject != "" { if err := c.browser.Connect(); err != nil { return err } @@ -203,6 +206,10 @@ func (c *Crawler) Crawl(rootURL string) error { break } item := queue.Pop() + if item == nil { + // if no elements are present in queue + break + } req, ok := item.(navigation.Request) if !ok { continue diff --git a/pkg/types/options.go b/pkg/types/options.go index 1b5d7e07..e9ec033c 100644 --- a/pkg/types/options.go +++ b/pkg/types/options.go @@ -97,14 +97,18 @@ type Options struct { StoreResponse bool // StoreResponseDir specifies if katana should use a custom directory to store http requests/responses StoreResponseDir string - // ChromeDataDir : Specify the --user-data-dir to chrome binary to preserve sessions - ChromeDataDir string // HeadlessNoIncognito specifies if chrome should be started without incognito mode HeadlessNoIncognito bool // HealthCheck determines if a self-healthcheck should be performed HealthCheck bool // ErrorLogFile specifies a file to write with the errors of all requests ErrorLogFile string + // OpenBrowser opens visible browser window to login + NewProject string + // CrawlProject uses saved session data while crawling + CrawlProject string + // ListProject lists all project + ListProject bool } func (options *Options) ParseCustomHeaders() map[string]string { diff --git a/pkg/utils/queue/queue.go b/pkg/utils/queue/queue.go index e0f870e6..c9118169 100644 --- a/pkg/utils/queue/queue.go +++ b/pkg/utils/queue/queue.go @@ -86,7 +86,7 @@ func (v *VarietyQueue) Pop() interface{} { v.mutex.Lock() defer v.mutex.Unlock() - var x interface{} + var x interface{} = nil if v.queueType == BreadthFirst { x = v.priorityQueue.Pop() } else if v.queueType == DepthFirst { From a1acc6ab8b82b275f6b3edb09720e93768b21edb Mon Sep 17 00:00:00 2001 From: Tarun Koyalwar Date: Tue, 14 Feb 2023 00:53:09 +0530 Subject: [PATCH 2/2] fix missing arg while printing --- internal/runner/options.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/runner/options.go b/internal/runner/options.go index 9c1f692b..e168dd46 100644 --- a/internal/runner/options.go +++ b/internal/runner/options.go @@ -53,7 +53,7 @@ func validateOptions(options *types.Options) error { } // check if project exists if _, err := os.Stat(options.CrawlProject); err != nil { - gologger.Fatal().Msgf("project %v does not exist, try creating new project with `-np` flag") + gologger.Fatal().Msgf("project %v does not exist, try creating new project with `-np` flag", err) } } if options.StoreResponseDir != "" && !options.StoreResponse {