Skip to content

Commit

Permalink
Merge branch 'master' into patch-1
Browse files Browse the repository at this point in the history
  • Loading branch information
JayBizzle authored May 24, 2021
2 parents f647ecf + a42972f commit 330a257
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 1 deletion.
2 changes: 1 addition & 1 deletion raw/Crawlers.json

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions raw/Crawlers.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,29 @@
^DangDang
^DavClnt
^DHSH
^Expanse
^FDM
^git\/
^Goose\/
^Grabber
^HTTPClient\/
^HTTPing
^Java\/
^Jeode\/
^Jetty\/
^Mail\/
^Mget
^Microsoft URL Control
^Mikrotik\/
^Netlab360
^NG\/[0-9\.]
^NING\/
^Nuclei
^PHP\/
^RMA\/
^Ruby|Ruby\/[0-9]
^Swurl
^TLS tester
^VSE\/[0-9]
^WordPress\.com
^XRL\/[0-9]
Expand Down Expand Up @@ -208,6 +213,7 @@ chkme\.com
Chlooe
Chromaxa
CirrusExplorer
CISPA Web Analyser
CISPA Vulnerability Notification
Citoid
CJNetworkQuality
Expand Down Expand Up @@ -412,6 +418,7 @@ Go [\d\.]* package http
Go http package
Go-Ahead-Got-It
Go-http-client
go-mtasts\/
Go!Zilla
gobyus
Gofeed
Expand All @@ -436,6 +443,7 @@ Google-Apps-Script
Google-Calendar-Importer
Google-HotelAdsVerifier
Google-HTTP-Java-Client
Google-SMTP-STS
Google-Publisher-Plugin
Google-Read-Aloud
Google-SearchByImage
Expand Down Expand Up @@ -475,6 +483,7 @@ Haansoft
hackney\/
Hadi Agent
HappyApps-WebCheck
Hardenize
Hatena
Havij
HaxerMen
Expand Down Expand Up @@ -600,6 +609,7 @@ JolokiaPwn
Joomla
Jorgee
JS-Kit
JungleKeyThumbnail
JustView
Kaspersky Lab CFR link resolver
Kelny\/
Expand All @@ -621,6 +631,7 @@ KumKie
L\.webis
Larbin
Lavf\/
leakix\.net
LeechFTP
LeechGet
letsencrypt
Expand Down Expand Up @@ -671,6 +682,7 @@ MacOutlook\/
Mag-Net
MagpieRSS
Mail\.Ru
Mail::STS
MailChimp
Majestic12
makecontact\/
Expand Down Expand Up @@ -828,6 +840,7 @@ PageFreezer
PageGrabber
PagePeeker
PageScorer
PageThing
Pagespeed\/
Panopta
panscient
Expand Down Expand Up @@ -878,6 +891,7 @@ Pompos
Porkbun
Port Monitor
postano
postfix-mta-sts-resolver
PostmanRuntime
postplanner\.com
PostPost
Expand Down Expand Up @@ -912,6 +926,7 @@ Qseero
Qualidator
QueryN Metasearch
queuedriver
quic-go-HTTP\/
QuiteRSS
Quora Link Preview
Qwantify
Expand Down Expand Up @@ -1252,6 +1267,7 @@ WebSniffer
Webster
WebStripper
WebSucker
webtech\/
Webthumb\/
WebThumbnail
WebWhacker
Expand Down
16 changes: 16 additions & 0 deletions src/Fixtures/Crawlers.php
Original file line number Diff line number Diff line change
Expand Up @@ -28,24 +28,29 @@ class Crawlers extends AbstractProvider
'^DangDang',
'^DavClnt',
'^DHSH',
'^Expanse',
'^FDM ',
'^git\/',
'^Goose\/',
'^Grabber',
'^HTTPClient\/',
'^HTTPing',
'^Java\/',
'^Jeode\/',
'^Jetty\/',
'^Mail\/',
'^Mget',
'^Microsoft URL Control',
'^Mikrotik\/',
'^Netlab360',
'^NG\/[0-9\.]',
'^NING\/',
'^Nuclei',
'^PHP\/',
'^RMA\/',
'^Ruby|Ruby\/[0-9]',
'^Swurl ',
'^TLS tester ',
'^VSE\/[0-9]',
'^WordPress\.com',
'^XRL\/[0-9]',
Expand Down Expand Up @@ -229,6 +234,7 @@ class Crawlers extends AbstractProvider
'Chlooe',
'Chromaxa',
'CirrusExplorer',
'CISPA Web Analyser',
'CISPA Vulnerability Notification',
'Citoid',
'CJNetworkQuality',
Expand Down Expand Up @@ -433,6 +439,7 @@ class Crawlers extends AbstractProvider
'Go http package',
'Go-Ahead-Got-It',
'Go-http-client',
'go-mtasts\/',
'Go!Zilla',
'gobyus',
'Gofeed',
Expand All @@ -457,6 +464,7 @@ class Crawlers extends AbstractProvider
'Google-Calendar-Importer',
'Google-HotelAdsVerifier',
'Google-HTTP-Java-Client',
'Google-SMTP-STS',
'Google-Publisher-Plugin',
'Google-Read-Aloud',
'Google-SearchByImage',
Expand Down Expand Up @@ -496,6 +504,7 @@ class Crawlers extends AbstractProvider
'hackney\/',
'Hadi Agent',
'HappyApps-WebCheck',
'Hardenize',
'Hatena',
'Havij',
'HaxerMen',
Expand Down Expand Up @@ -621,6 +630,7 @@ class Crawlers extends AbstractProvider
'Joomla',
'Jorgee',
'JS-Kit',
'JungleKeyThumbnail',
'JustView',
'Kaspersky Lab CFR link resolver',
'Kelny\/',
Expand All @@ -642,6 +652,7 @@ class Crawlers extends AbstractProvider
'L\.webis',
'Larbin',
'Lavf\/',
'leakix\.net',
'LeechFTP',
'LeechGet',
'letsencrypt',
Expand Down Expand Up @@ -692,6 +703,7 @@ class Crawlers extends AbstractProvider
'Mag-Net',
'MagpieRSS',
'Mail\.Ru',
'Mail::STS',
'MailChimp',
'Majestic12',
'makecontact\/',
Expand Down Expand Up @@ -849,6 +861,7 @@ class Crawlers extends AbstractProvider
'PageGrabber',
'PagePeeker',
'PageScorer',
'PageThing',
'Pagespeed\/',
'Panopta',
'panscient',
Expand Down Expand Up @@ -899,6 +912,7 @@ class Crawlers extends AbstractProvider
'Porkbun',
'Port Monitor',
'postano',
'postfix-mta-sts-resolver',
'PostmanRuntime',
'postplanner\.com',
'PostPost',
Expand Down Expand Up @@ -933,6 +947,7 @@ class Crawlers extends AbstractProvider
'Qualidator',
'QueryN Metasearch',
'queuedriver',
'quic-go-HTTP\/',
'QuiteRSS',
'Quora Link Preview',
'Qwantify',
Expand Down Expand Up @@ -1273,6 +1288,7 @@ class Crawlers extends AbstractProvider
'Webster',
'WebStripper',
'WebSucker',
'webtech\/',
'Webthumb\/',
'WebThumbnail',
'WebWhacker',
Expand Down
23 changes: 23 additions & 0 deletions tests/crawlers.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3592,3 +3592,26 @@ Pleroma 2.1.50-495-gf9ece1a7-develop+dev; https://search.fedi.app <pleroma@searc
SmerBot/0.1 ([email protected]) Language Model Dataset Fetcher
Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/71.0.3578.98 Safari/537.36 Prerender (+https://github.com/prerender/prerender)
Bitrix link preview
Mikrotik/6.x Fetch
Mikrotik/7.x Fetch
HTTPing v2.5
Mozilla/5.0 (compatible; JungleKeyThumbnail/1.1; +http://www.junglekey.fr/)
postfix-mta-sts-resolver
go-mtasts/1.0
Mail::STS
Google-SMTP-STS
Netlab360
TLS tester from https://testssl.sh/dev/
TBI-HttpOpenPlugin/0.1.0 (+https://leakix.net/)
TBI-WebScanner/0.0.1 (+https://leakix.net/)
quic-go-HTTP/3
CISPA Web Analyser (https://notify.cispa.saarland)
webtech/1.2.11
Hardenize (https://www.hardenize.com)
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:46.0) Gecko/20100101 Firefox/46.0 | Hardenize (https://www.hardenize.com)
Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:46.0) Gecko/20100101 Firefox/46.0 | Hardenize/v1.1196.1 (https://www.hardenize.com)
Expanse indexes the network perimeters of our customers. If you have any questions or concerns, please reach out to: [email protected]
Expanse, a Palo Alto Networks company, searches across the global IPv4 space multiple times per day to identify customers&#39; presences on the Internet. If you would like to be excluded from our scans, please send IP addresses/domains to: [email protected]
PageThing http://pagething.com
PageThing.com
Mozilla/5.0 (Windows; U; Windows NT 5.1; pl; PageThing http://pagething.com); rv:1.9; Gecko/2008052906 Firefox/3.0

0 comments on commit 330a257

Please sign in to comment.