Skip to content

Commit

Permalink
Merge pull request #66 from karthikuj/60-feature-add-multi-browser-su…
Browse files Browse the repository at this point in the history
…pport

Adds multi browser support
  • Loading branch information
karthikuj authored Jul 13, 2024
2 parents dfefe30 + 300dafa commit 87c6ec9
Show file tree
Hide file tree
Showing 7 changed files with 248 additions and 184 deletions.
23 changes: 16 additions & 7 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
# Changelog

All notable changes to this project will be documented in this file.

## [0.1.0] - 2024-03-30

### Added
# Changelog

All notable changes to this project will be documented in this file.

## Unreleased

### Added
- Multi-browser support
- Wait till `document.readyState` is equal to `complete`.

### Fixed
- `CrawlAction` element name bug.
- Check if CrawlAction is unique only if it is not a link.

## [0.1.0] - 2024-03-30
### Added
- Initial release of the project.
83 changes: 42 additions & 41 deletions config/config.json
Original file line number Diff line number Diff line change
@@ -1,42 +1,43 @@
{
"browser": {
"headless": false,
"maximize": true,
"proxy": {
"enabled": false,
"host": "127.0.0.1",
"port": 8080
}
},
"crawler": {
"entryPoint": "https://security-crawl-maze.app/",
"eventTimeout": 10000,
"navigationTimeout": 30000,
"eventWait": 1000,
"maxDuration": 0,
"elements": [
"a",
"button",
"input[type=\"submit\"]"
],
"maxChildren": 0,
"maxDepth": 10,
"authentication": {
"basicAuth": {
"enabled": false,
"username": "username",
"password": "password"
},
"recorderAuth": {
"enabled": false,
"pptrRecording": "/path/to/pptrRecording.json"
}
},
"includeRegexes": [
"https?://security-crawl-maze.app(?:/.*|)"
],
"excludeRegexes": [
".*logout.*"
]
}
{
"browser": {
"headless": false,
"maximize": true,
"proxy": {
"enabled": false,
"host": "127.0.0.1",
"port": 8080
},
"instances": 4
},
"crawler": {
"entryPoint": "https://security-crawl-maze.app/",
"eventTimeout": 10000,
"navigationTimeout": 30000,
"eventWait": 0,
"maxDuration": 0,
"elements": [
"a",
"button",
"input[type=\"submit\"]"
],
"maxChildren": 0,
"maxDepth": 10,
"authentication": {
"basicAuth": {
"enabled": false,
"username": "username",
"password": "password"
},
"recorderAuth": {
"enabled": false,
"pptrRecording": "/path/to/login/recording"
}
},
"includeRegexes": [
"https?://security-crawl-maze.app(?:/.*|)"
],
"excludeRegexes": [
".*logout.*"
]
}
}
99 changes: 50 additions & 49 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,49 +1,50 @@
{
"name": "sasori-crawl",
"version": "0.2.0",
"description": "Sasori is a dynamic web crawler powered by Puppeteer, designed for lightning-fast endpoint discovery.",
"main": "bin/index.js",
"author": "Karthik UJ",
"contributors": [
{
"name": "Karthik UJ",
"email": "[email protected]",
"url": "https://www.5up3r541y4n.tech/"
}
],
"license": "MIT",
"repository": {
"type": "git",
"url": "git+https://github.com/karthikuj/sasori.git"
},
"bin": {
"sasori": "bin/index.js"
},
"scripts": {
"start": "node ."
},
"keywords": [
"crawler",
"crawling",
"scraping",
"endpoint-discovery",
"puppeteer",
"dynamic",
"automation",
"security",
"dast",
"infosec"
],
"dependencies": {
"@puppeteer/replay": "^2.13.4",
"chalk": "^4",
"cheerio": "^1.0.0-rc.12",
"joi": "^17.12.2",
"puppeteer": "^21.5.1",
"yargs": "^17.7.2"
},
"devDependencies": {
"eslint": "^8.57.0",
"eslint-config-google": "^0.14.0"
}
}
{
"name": "sasori-crawl",
"version": "1.0.0",
"description": "Sasori is a dynamic web crawler powered by Puppeteer, designed for lightning-fast endpoint discovery.",
"main": "bin/index.js",
"author": "Karthik UJ",
"contributors": [
{
"name": "Karthik UJ",
"email": "[email protected]",
"url": "https://www.5up3r541y4n.tech/"
}
],
"license": "MIT",
"repository": {
"type": "git",
"url": "git+https://github.com/karthikuj/sasori.git"
},
"bin": {
"sasori": "bin/index.js"
},
"scripts": {
"start": "node .",
"lint": "npx eslint . --fix"
},
"keywords": [
"crawler",
"crawling",
"scraping",
"endpoint-discovery",
"puppeteer",
"dynamic",
"automation",
"security",
"dast",
"infosec"
],
"dependencies": {
"@puppeteer/replay": "^2.13.4",
"chalk": "^4",
"cheerio": "^1.0.0-rc.12",
"joi": "^17.12.2",
"puppeteer": "^21.5.1",
"yargs": "^17.7.2"
},
"devDependencies": {
"eslint": "^8.57.0",
"eslint-config-google": "^0.14.0"
}
}
2 changes: 1 addition & 1 deletion src/crawler/crawlAction.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ const {randomUUID} = require('crypto');
*/
class CrawlAction {
static {
this.ANCHOR = 'a';
this.ANCHOR = 'A';
}

/**
Expand Down
6 changes: 5 additions & 1 deletion src/crawler/crawlStateManager.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ class CrawlStateManager {
*/
constructor(rootState) {
this.rootState = rootState ? rootState : null;
this.visitedActions = new Set();
}

/**
Expand Down Expand Up @@ -73,7 +74,10 @@ class CrawlStateManager {
if (childState) {
stack.push(...childState.getCrawlActions());
} else {
return currentAction;
if (!this.visitedActions.has(currentAction.actionId)) {
this.visitedActions.add(currentAction.actionId);
return currentAction;
}
}
}
}
Expand Down
Loading

0 comments on commit 87c6ec9

Please sign in to comment.