From 3b30b55a6fff4e5c233105a2f3cf6ddad02af387 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 2 Aug 2024 11:20:29 -0400 Subject: [PATCH 01/24] fix Dastardly error --- README.md | 4 +++- bbot/modules/deadly/dastardly.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 65656ea03..d3281cb5c 100644 --- a/README.md +++ b/README.md @@ -330,6 +330,8 @@ For details, see [Configuration](https://www.blacklanternsecurity.com/bbot/scann ## Documentation +Also see: [Bleeding-Edge Documentation (`dev` branch)](https://www.blacklanternsecurity.com/bbot/dev_branch) + - **User Manual** - **Basics** @@ -393,7 +395,7 @@ Thanks to these amazing people for contributing to BBOT! :heart: Special thanks to: -- @TheTechromancer for creating [BBOT](https://github.com/blacklanternsecurity/bbot) +- @TheTechromancer for creating BBOT - @liquidsec for his extensive work on BBOT's web hacking features, including [badsecrets](https://github.com/blacklanternsecurity/badsecrets) and [baddns](https://github.com/blacklanternsecurity/baddns) - Steve Micallef (@smicallef) for creating Spiderfoot - @kerrymilan for his Neo4j and Ansible expertise diff --git a/bbot/modules/deadly/dastardly.py b/bbot/modules/deadly/dastardly.py index 4476b99ab..c2551e489 100644 --- a/bbot/modules/deadly/dastardly.py +++ b/bbot/modules/deadly/dastardly.py @@ -94,7 +94,9 @@ def parse_dastardly_xml(self, xml_file): for testsuite in et.iter("testsuite"): yield TestSuite(testsuite) except FileNotFoundError: - pass + self.debug(f'Could not find Dastardly XML file at {xml_file}') + except OSError as e: + self.verbose(f'Error opening Dastardly XML file at {xml_file}: {e}') except etree.ParseError as e: self.warning(f"Error parsing Dastardly XML at {xml_file}: {e}") From 1ea03fd107d07c54c4c9647ec55f49a4cc1eea2b Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 2 Aug 2024 11:20:41 -0400 Subject: [PATCH 02/24] blacked --- bbot/modules/deadly/dastardly.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/modules/deadly/dastardly.py b/bbot/modules/deadly/dastardly.py index c2551e489..04fb5313f 100644 --- a/bbot/modules/deadly/dastardly.py +++ b/bbot/modules/deadly/dastardly.py @@ -94,9 +94,9 @@ def parse_dastardly_xml(self, xml_file): for testsuite in et.iter("testsuite"): yield TestSuite(testsuite) except FileNotFoundError: - self.debug(f'Could not find Dastardly XML file at {xml_file}') + self.debug(f"Could not find Dastardly XML file at {xml_file}") except OSError as e: - self.verbose(f'Error opening Dastardly XML file at {xml_file}: {e}') + self.verbose(f"Error opening Dastardly XML file at {xml_file}: {e}") except etree.ParseError as e: self.warning(f"Error parsing Dastardly XML at {xml_file}: {e}") From cda85df0927872543b606b5db1700e887c97e07d Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 2 Aug 2024 15:18:34 -0400 Subject: [PATCH 03/24] add timeout for resolve_raw_batch() --- bbot/core/engine.py | 15 +++++++++++++-- bbot/core/helpers/dns/engine.py | 4 ++-- bbot/core/helpers/web/engine.py | 2 +- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/bbot/core/engine.py b/bbot/core/engine.py index 52d4b871a..cde424b6a 100644 --- a/bbot/core/engine.py +++ b/bbot/core/engine.py @@ -537,9 +537,20 @@ def new_child_task(self, client_id, coro): self.child_tasks[client_id] = {task} return task - async def finished_tasks(self, client_id): + async def finished_tasks(self, client_id, timeout=None): child_tasks = self.child_tasks.get(client_id, set()) - done, pending = await asyncio.wait(child_tasks, return_when=asyncio.FIRST_COMPLETED) + try: + done, pending = await asyncio.wait(child_tasks, return_when=asyncio.FIRST_COMPLETED, timeout=timeout) + except BaseException as e: + if isinstance(e, (TimeoutError, asyncio.TimeoutError)): + done = set() + self.log.warning(f"{self.name}: Timeout after {timeout:,} seconds in finished_tasks({child_tasks})") + for task in child_tasks: + task.cancel() + else: + self.log.error(f"{self.name}: Unhandled exception in finished_tasks({child_tasks}): {e}") + self.log.trace(traceback.format_exc()) + raise self.child_tasks[client_id] = pending return done diff --git a/bbot/core/helpers/dns/engine.py b/bbot/core/helpers/dns/engine.py index 91efca10d..16be33c35 100644 --- a/bbot/core/helpers/dns/engine.py +++ b/bbot/core/helpers/dns/engine.py @@ -361,7 +361,7 @@ def new_task(query): while tasks: # While there are tasks pending # Wait for the first task to complete - finished = await self.finished_tasks(client_id) + finished = await self.finished_tasks(client_id, timeout=120) for task in finished: results = task.result() @@ -388,7 +388,7 @@ def new_task(query, rdtype): while tasks: # While there are tasks pending # Wait for the first task to complete - finished = await self.finished_tasks(client_id) + finished = self.finished_tasks(client_id, timeout=120) for task in finished: answers, errors = task.result() diff --git a/bbot/core/helpers/web/engine.py b/bbot/core/helpers/web/engine.py index bc58057ed..30e037e6c 100644 --- a/bbot/core/helpers/web/engine.py +++ b/bbot/core/helpers/web/engine.py @@ -100,7 +100,7 @@ def new_task(): while tasks: # While there are tasks pending # Wait for the first task to complete - finished = await self.finished_tasks(client_id) + finished = await self.finished_tasks(client_id, timeout=120) for task in finished: response = task.result() From cbe4746def01f73d1ab420d47d6277ee09f22921 Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 2 Aug 2024 15:22:10 -0400 Subject: [PATCH 04/24] fix error --- bbot/core/helpers/dns/engine.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bbot/core/helpers/dns/engine.py b/bbot/core/helpers/dns/engine.py index 16be33c35..1d7e87229 100644 --- a/bbot/core/helpers/dns/engine.py +++ b/bbot/core/helpers/dns/engine.py @@ -388,7 +388,7 @@ def new_task(query, rdtype): while tasks: # While there are tasks pending # Wait for the first task to complete - finished = self.finished_tasks(client_id, timeout=120) + finished = await self.finished_tasks(client_id, timeout=120) for task in finished: answers, errors = task.result() From 959b916746fa51bcca6fe322ac1974782ae2effb Mon Sep 17 00:00:00 2001 From: github-actions Date: Fri, 2 Aug 2024 19:20:11 -0400 Subject: [PATCH 05/24] more engine debugging --- bbot/core/engine.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bbot/core/engine.py b/bbot/core/engine.py index cde424b6a..e49b1ecfd 100644 --- a/bbot/core/engine.py +++ b/bbot/core/engine.py @@ -417,6 +417,7 @@ async def run_and_yield(self, client_id, command_fn, *args, **kwargs): self.log.debug(f"{self.name} run-and-yield {fn_str}") try: async for _ in command_fn(*args, **kwargs): + self.log.debug(f"{self.name}: sending iteration for {command_fn.__name__}(): {_}") await self.send_socket_multipart(client_id, _) except BaseException as e: error = f"Error in {self.name}.{fn_str}: {e}" From 278496b39d980ff5c557c6e0647b5f94d5b989af Mon Sep 17 00:00:00 2001 From: github-actions Date: Sat, 3 Aug 2024 18:11:45 -0400 Subject: [PATCH 06/24] default 5-minute timeout on engine interface --- bbot/core/engine.py | 22 ++++++++++++++++----- bbot/core/helpers/dns/engine.py | 34 ++++++++++++++++----------------- 2 files changed, 34 insertions(+), 22 deletions(-) diff --git a/bbot/core/engine.py b/bbot/core/engine.py index e49b1ecfd..ad90af9d4 100644 --- a/bbot/core/engine.py +++ b/bbot/core/engine.py @@ -62,15 +62,24 @@ def unpickle(self, binary): return error_sentinel async def _infinite_retry(self, callback, *args, **kwargs): - interval = kwargs.pop("_interval", 10) + interval = kwargs.pop("_interval", 15) context = kwargs.pop("_context", "") + # default overall timeout of 5 minutes (15 second interval * 20 iterations) + max_retries = kwargs.pop("_max_retries", 4 * 5) if not context: context = f"{callback.__name__}({args}, {kwargs})" + retries = 0 while not self._shutdown_status: try: return await asyncio.wait_for(callback(*args, **kwargs), timeout=interval) - except (TimeoutError, asyncio.TimeoutError): + except TimeoutError: self.log.debug(f"{self.name}: Timeout waiting for response for {context}, retrying...") + retries += 1 + if max_retries is not None and retries > max_retries: + self.log.error(f"{self.name}: Timed out after {max_retries:,} waiting for {context}") + raise TimeoutError( + f"Timed out after {max_retries:,} {interval:,}-second iterations waiting for {context}" + ) class EngineClient(EngineBase): @@ -205,7 +214,9 @@ async def send_cancel_message(self, socket, context): message = pickle.dumps({"c": -1}) await self._infinite_retry(socket.send, message) while 1: - response = await self._infinite_retry(socket.recv, _context=f"waiting for CANCEL_OK from {context}") + response = await self._infinite_retry( + socket.recv, _context=f"waiting for CANCEL_OK from {context}", _max_retries=4 + ) response = pickle.loads(response) if isinstance(response, dict): response = response.get("m", "") @@ -549,8 +560,9 @@ async def finished_tasks(self, client_id, timeout=None): for task in child_tasks: task.cancel() else: - self.log.error(f"{self.name}: Unhandled exception in finished_tasks({child_tasks}): {e}") - self.log.trace(traceback.format_exc()) + if not in_exception_chain(e, (KeyboardInterrupt, asyncio.CancelledError)): + self.log.error(f"{self.name}: Unhandled exception in finished_tasks({child_tasks}): {e}") + self.log.trace(traceback.format_exc()) raise self.child_tasks[client_id] = pending return done diff --git a/bbot/core/helpers/dns/engine.py b/bbot/core/helpers/dns/engine.py index 1d7e87229..6e174f540 100644 --- a/bbot/core/helpers/dns/engine.py +++ b/bbot/core/helpers/dns/engine.py @@ -119,8 +119,8 @@ async def resolve(self, query, **kwargs): for _, host in extract_targets(answer): results.add(host) except BaseException: - log.trace(f"Caught exception in resolve({query}, {kwargs}):") - log.trace(traceback.format_exc()) + self.log.trace(f"Caught exception in resolve({query}, {kwargs}):") + self.log.trace(traceback.format_exc()) raise self.debug(f"Results for {query} with kwargs={kwargs}: {results}") @@ -165,8 +165,8 @@ async def resolve_raw(self, query, **kwargs): else: return await self._resolve_hostname(query, rdtype=rdtype, **kwargs) except BaseException: - log.trace(f"Caught exception in resolve_raw({query}, {kwargs}):") - log.trace(traceback.format_exc()) + self.log.trace(f"Caught exception in resolve_raw({query}, {kwargs}):") + self.log.trace(traceback.format_exc()) raise async def _resolve_hostname(self, query, **kwargs): @@ -219,11 +219,11 @@ async def _resolve_hostname(self, query, **kwargs): if error_count >= self.abort_threshold: connectivity = await self._connectivity_check() if connectivity: - log.verbose( + self.log.verbose( f'Aborting query "{query}" because failed {rdtype} queries for "{parent}" ({error_count:,}) exceeded abort threshold ({self.abort_threshold:,})' ) if parent_hash not in self._dns_warnings: - log.verbose( + self.log.verbose( f'Aborting future {rdtype} queries to "{parent}" because error count ({error_count:,}) exceeded abort threshold ({self.abort_threshold:,})' ) self._dns_warnings.add(parent_hash) @@ -257,7 +257,7 @@ async def _resolve_hostname(self, query, **kwargs): self.debug(err_msg) self.debug(f"Retry (#{retry_num}) resolving {query} with kwargs={kwargs}") else: - log.verbose(err_msg) + self.log.verbose(err_msg) if results: self._last_dns_success = time.time() @@ -423,13 +423,13 @@ async def _catch(self, callback, *args, **kwargs): except dns.resolver.NoNameservers: raise except (dns.exception.Timeout, dns.resolver.LifetimeTimeout, TimeoutError): - log.debug(f"DNS query with args={args}, kwargs={kwargs} timed out after {self.timeout} seconds") + self.log.debug(f"DNS query with args={args}, kwargs={kwargs} timed out after {self.timeout} seconds") raise except dns.exception.DNSException as e: self.debug(f"{e} (args={args}, kwargs={kwargs})") except Exception as e: - log.warning(f"Error in {callback.__qualname__}() with args={args}, kwargs={kwargs}: {e}") - log.trace(traceback.format_exc()) + self.log.warning(f"Error in {callback.__qualname__}() with args={args}, kwargs={kwargs}: {e}") + self.log.trace(traceback.format_exc()) return [] async def is_wildcard(self, query, ips=None, rdtype=None): @@ -531,7 +531,7 @@ async def is_wildcard(self, query, ips=None, rdtype=None): base_query_rdtypes = set(query_baseline) wildcard_rdtypes_set = set([k for k, v in result.items() if v[0] is True]) if base_query_rdtypes and wildcard_rdtypes_set and base_query_rdtypes == wildcard_rdtypes_set: - log.debug( + self.log.debug( f"Breaking from wildcard detection for {query} at {host} because base query rdtypes ({base_query_rdtypes}) == wildcard rdtypes ({wildcard_rdtypes_set})" ) raise DNSWildcardBreak() @@ -582,7 +582,7 @@ async def is_wildcard_domain(self, domain, log_info=False): wildcard_domain_results[host] = self._wildcard_cache[host_hash] continue - log.verbose(f"Checking if {host} is a wildcard") + self.log.verbose(f"Checking if {host} is a wildcard") # determine if this is a wildcard domain @@ -612,12 +612,12 @@ async def is_wildcard_domain(self, domain, log_info=False): wildcard_domain_results.update({host: wildcard_results}) if is_wildcard: wildcard_rdtypes_str = ",".join(sorted([t.upper() for t, r in wildcard_results.items() if r])) - log_fn = log.verbose + log_fn = self.log.verbose if log_info: - log_fn = log.info + log_fn = self.log.info log_fn(f"Encountered domain with wildcard DNS ({wildcard_rdtypes_str}): {host}") else: - log.verbose(f"Finished checking {host}, it is not a wildcard") + self.log.verbose(f"Finished checking {host}, it is not a wildcard") return wildcard_domain_results @@ -653,14 +653,14 @@ async def _connectivity_check(self, interval=5): self._last_dns_success = time.time() return True if time.time() - self._last_connectivity_warning > interval: - log.warning(f"DNS queries are failing, please check your internet connection") + self.log.warning(f"DNS queries are failing, please check your internet connection") self._last_connectivity_warning = time.time() self._errors.clear() return False def debug(self, *args, **kwargs): if self._debug: - log.trace(*args, **kwargs) + self.log.trace(*args, **kwargs) @property def in_tests(self): From eb326e0b0baf996221d8da707175ceef09f83ee4 Mon Sep 17 00:00:00 2001 From: github-actions Date: Sat, 3 Aug 2024 18:33:19 -0400 Subject: [PATCH 07/24] LFU cache for wildcard checks --- bbot/core/helpers/dns/dns.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index 7e347ed69..247fe9f8a 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -3,6 +3,7 @@ import dns.exception import dns.asyncresolver from radixtarget import RadixTarget +from cachetools import cached, LFUCache from bbot.errors import DNSError from bbot.core.engine import EngineClient @@ -111,6 +112,7 @@ def brute(self): self._brute = DNSBrute(self.parent_helper) return self._brute + @cached(cache=LFUCache(maxsize=1000)) async def is_wildcard(self, query, ips=None, rdtype=None): """ Use this method to check whether a *host* is a wildcard entry @@ -156,6 +158,7 @@ async def is_wildcard(self, query, ips=None, rdtype=None): return await self.run_and_return("is_wildcard", query=query, ips=ips, rdtype=rdtype) + @cached(cache=LFUCache(maxsize=1000)) async def is_wildcard_domain(self, domain, log_info=False): domain = self._wildcard_prevalidation(domain) if not domain: From d9815d33a5d47641a19b32d373c174a4ba390222 Mon Sep 17 00:00:00 2001 From: github-actions Date: Sat, 3 Aug 2024 19:20:35 -0400 Subject: [PATCH 08/24] LFU cache for DNS engine --- bbot/core/helpers/async_helpers.py | 24 +++++++++++++++++++++++- bbot/core/helpers/dns/dns.py | 10 +++++++--- 2 files changed, 30 insertions(+), 4 deletions(-) diff --git a/bbot/core/helpers/async_helpers.py b/bbot/core/helpers/async_helpers.py index f19b0f343..722a63d40 100644 --- a/bbot/core/helpers/async_helpers.py +++ b/bbot/core/helpers/async_helpers.py @@ -2,9 +2,10 @@ import random import asyncio import logging +import functools from datetime import datetime -from cachetools import LRUCache from .misc import human_timedelta +from cachetools import keys, LRUCache from contextlib import asynccontextmanager log = logging.getLogger("bbot.core.helpers.async_helpers") @@ -105,3 +106,24 @@ def async_to_sync_gen(async_gen): yield loop.run_until_complete(async_gen.__anext__()) except StopAsyncIteration: pass + + +def async_cachedmethod(cache, key=keys.hashkey): + def decorator(method): + async def wrapper(self, *args, **kwargs): + method_cache = cache(self) + k = key(*args, **kwargs) + try: + return method_cache[k] + except KeyError: + pass + ret = await method(self, *args, **kwargs) + try: + method_cache[k] = ret + except ValueError: + pass + return ret + + return functools.wraps(method)(wrapper) + + return decorator diff --git a/bbot/core/helpers/dns/dns.py b/bbot/core/helpers/dns/dns.py index 247fe9f8a..2f77ce081 100644 --- a/bbot/core/helpers/dns/dns.py +++ b/bbot/core/helpers/dns/dns.py @@ -2,11 +2,12 @@ import logging import dns.exception import dns.asyncresolver +from cachetools import LFUCache from radixtarget import RadixTarget -from cachetools import cached, LFUCache from bbot.errors import DNSError from bbot.core.engine import EngineClient +from bbot.core.helpers.async_helpers import async_cachedmethod from ..misc import clean_dns_record, is_ip, is_domain, is_dns_name from .engine import DNSEngine @@ -80,6 +81,9 @@ def __init__(self, parent_helper): # brute force helper self._brute = None + self._is_wildcard_cache = LFUCache(maxsize=1000) + self._is_wildcard_domain_cache = LFUCache(maxsize=1000) + async def resolve(self, query, **kwargs): return await self.run_and_return("resolve", query=query, **kwargs) @@ -112,7 +116,7 @@ def brute(self): self._brute = DNSBrute(self.parent_helper) return self._brute - @cached(cache=LFUCache(maxsize=1000)) + @async_cachedmethod(lambda self: self._is_wildcard_cache) async def is_wildcard(self, query, ips=None, rdtype=None): """ Use this method to check whether a *host* is a wildcard entry @@ -158,7 +162,7 @@ async def is_wildcard(self, query, ips=None, rdtype=None): return await self.run_and_return("is_wildcard", query=query, ips=ips, rdtype=rdtype) - @cached(cache=LFUCache(maxsize=1000)) + @async_cachedmethod(lambda self: self._is_wildcard_domain_cache) async def is_wildcard_domain(self, domain, log_info=False): domain = self._wildcard_prevalidation(domain) if not domain: From 522e39c3681d47cfcd7bf3c93552ddf625203dc1 Mon Sep 17 00:00:00 2001 From: github-actions Date: Sat, 3 Aug 2024 19:33:54 -0400 Subject: [PATCH 09/24] clearer log messages --- bbot/core/engine.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/bbot/core/engine.py b/bbot/core/engine.py index ad90af9d4..ea8cbd96d 100644 --- a/bbot/core/engine.py +++ b/bbot/core/engine.py @@ -73,13 +73,10 @@ async def _infinite_retry(self, callback, *args, **kwargs): try: return await asyncio.wait_for(callback(*args, **kwargs), timeout=interval) except TimeoutError: - self.log.debug(f"{self.name}: Timeout waiting for response for {context}, retrying...") + self.log.debug(f"{self.name}: Timeout after {interval:,} seconds {context}, retrying...") retries += 1 if max_retries is not None and retries > max_retries: - self.log.error(f"{self.name}: Timed out after {max_retries:,} waiting for {context}") - raise TimeoutError( - f"Timed out after {max_retries:,} {interval:,}-second iterations waiting for {context}" - ) + raise TimeoutError(f"Timed out after {max_retries*interval:,} seconds {context}") class EngineClient(EngineBase): From 55b6d3cf0e4dee94701d15c51f73f8a9002c192f Mon Sep 17 00:00:00 2001 From: github-actions Date: Sun, 4 Aug 2024 02:29:22 -0400 Subject: [PATCH 10/24] timeout errors --- bbot/core/engine.py | 12 ++++++------ bbot/core/helpers/dns/engine.py | 4 +++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/bbot/core/engine.py b/bbot/core/engine.py index ea8cbd96d..b2a9cdd7e 100644 --- a/bbot/core/engine.py +++ b/bbot/core/engine.py @@ -72,8 +72,8 @@ async def _infinite_retry(self, callback, *args, **kwargs): while not self._shutdown_status: try: return await asyncio.wait_for(callback(*args, **kwargs), timeout=interval) - except TimeoutError: - self.log.debug(f"{self.name}: Timeout after {interval:,} seconds {context}, retrying...") + except (TimeoutError, asyncio.exceptions.TimeoutError): + self.log.debug(f"{self.name}: Timeout after {interval:,} seconds{context}, retrying...") retries += 1 if max_retries is not None and retries > max_retries: raise TimeoutError(f"Timed out after {max_retries*interval:,} seconds {context}") @@ -224,9 +224,9 @@ async def send_shutdown_message(self): async with self.new_socket() as socket: # -99 == special shutdown message message = pickle.dumps({"c": -99}) - with suppress(TimeoutError, asyncio.TimeoutError): + with suppress(TimeoutError, asyncio.exceptions.TimeoutError): await asyncio.wait_for(socket.send(message), 0.5) - with suppress(TimeoutError, asyncio.TimeoutError): + with suppress(TimeoutError, asyncio.exceptions.TimeoutError): while 1: response = await asyncio.wait_for(socket.recv(), 0.5) response = pickle.loads(response) @@ -551,7 +551,7 @@ async def finished_tasks(self, client_id, timeout=None): try: done, pending = await asyncio.wait(child_tasks, return_when=asyncio.FIRST_COMPLETED, timeout=timeout) except BaseException as e: - if isinstance(e, (TimeoutError, asyncio.TimeoutError)): + if isinstance(e, (TimeoutError, asyncio.exceptions.TimeoutError)): done = set() self.log.warning(f"{self.name}: Timeout after {timeout:,} seconds in finished_tasks({child_tasks})") for task in child_tasks: @@ -583,7 +583,7 @@ async def cancel_task(self, client_id): async def _cancel_task(self, task): try: await asyncio.wait_for(task, timeout=10) - except (TimeoutError, asyncio.TimeoutError): + except (TimeoutError, asyncio.exceptions.TimeoutError): self.log.debug(f"{self.name}: Timeout cancelling task") return except (KeyboardInterrupt, asyncio.CancelledError): diff --git a/bbot/core/helpers/dns/engine.py b/bbot/core/helpers/dns/engine.py index 6e174f540..6840d5506 100644 --- a/bbot/core/helpers/dns/engine.py +++ b/bbot/core/helpers/dns/engine.py @@ -239,6 +239,7 @@ async def _resolve_hostname(self, query, **kwargs): dns.exception.Timeout, dns.resolver.LifetimeTimeout, TimeoutError, + asyncio.exceptions.TimeoutError, ) as e: try: self._errors[parent_hash] += 1 @@ -307,10 +308,11 @@ async def _resolve_ip(self, query, **kwargs): self._dns_cache[dns_cache_hash] = results break except ( + dns.resolver.NoNameservers, dns.exception.Timeout, dns.resolver.LifetimeTimeout, - dns.resolver.NoNameservers, TimeoutError, + asyncio.exceptions.TimeoutError, ) as e: errors.append(e) # don't retry if we get a SERVFAIL From 471e159badf556e6e9abad9338d7a585d6efe14a Mon Sep 17 00:00:00 2001 From: github-actions Date: Sun, 4 Aug 2024 11:43:21 -0400 Subject: [PATCH 11/24] support presets= in python API --- bbot/modules/dnsbrute_mutations.py | 2 +- bbot/scanner/preset/preset.py | 9 +++++++++ bbot/test/test_step_1/test_presets.py | 13 +++++++++++++ bbot/test/test_step_1/test_python_api.py | 4 ++++ docs/dev/dev_environment.md | 2 ++ docs/dev/index.md | 12 ++++++++++++ mkdocs.yml | 1 + 7 files changed, 42 insertions(+), 1 deletion(-) diff --git a/bbot/modules/dnsbrute_mutations.py b/bbot/modules/dnsbrute_mutations.py index 2087ccb15..78513fc2d 100644 --- a/bbot/modules/dnsbrute_mutations.py +++ b/bbot/modules/dnsbrute_mutations.py @@ -106,7 +106,7 @@ def add_mutation(m): # skip if there's hardly any mutations if len(mutations) < 10: - self.debug( + self.verbose( f"Skipping {len(mutations):,} mutations against {domain} because there are less than 10" ) break diff --git a/bbot/scanner/preset/preset.py b/bbot/scanner/preset/preset.py index d2d55ad39..9e8242ea9 100644 --- a/bbot/scanner/preset/preset.py +++ b/bbot/scanner/preset/preset.py @@ -97,6 +97,7 @@ def __init__( config=None, module_dirs=None, include=None, + presets=None, output_dir=None, scan_name=None, name=None, @@ -125,6 +126,7 @@ def __init__( module_dirs (list[str], optional): additional directories to load modules from. config (dict, optional): Additional scan configuration settings. include (list[str], optional): names or filenames of other presets to include. + presets (list[str], optional): an alias for `include`. output_dir (str or Path, optional): Directory to store scan output. Defaults to BBOT home directory (`~/.bbot`). scan_name (str, optional): Human-readable name of the scan. If not specified, it will be random, e.g. "demonic_jimmy". name (str, optional): Human-readable name of the preset. Used mainly for logging. @@ -240,6 +242,13 @@ def __init__( self._target = None + # "presets" is alias to "include" + if presets and include: + raise ValueError( + 'Cannot use both "presets" and "include" args at the same time (presets is only an alias to include). Please pick only one :)' + ) + if presets and not include: + include = presets # include other presets if include and not isinstance(include, (list, tuple, set)): include = [include] diff --git a/bbot/test/test_step_1/test_presets.py b/bbot/test/test_step_1/test_presets.py index 89c523df0..768ee3429 100644 --- a/bbot/test/test_step_1/test_presets.py +++ b/bbot/test/test_step_1/test_presets.py @@ -725,6 +725,7 @@ def test_preset_include(): """ ) + # with include= preset = Preset(include=[str(custom_preset_dir_1 / "preset1")]) assert preset.config.modules.testpreset1.test == "asdf" assert preset.config.modules.testpreset2.test == "fdsa" @@ -732,6 +733,18 @@ def test_preset_include(): assert preset.config.modules.testpreset4.test == "zxcv" assert preset.config.modules.testpreset5.test == "hjkl" + # same thing but with presets= (an alias to include) + preset = Preset(presets=[str(custom_preset_dir_1 / "preset1")]) + assert preset.config.modules.testpreset1.test == "asdf" + assert preset.config.modules.testpreset2.test == "fdsa" + assert preset.config.modules.testpreset3.test == "qwerty" + assert preset.config.modules.testpreset4.test == "zxcv" + assert preset.config.modules.testpreset5.test == "hjkl" + + # can't use both include= and presets= at the same time + with pytest.raises(ValueError): + preset = Preset(presets=["subdomain-enum"], include=["dirbust-light"]) + @pytest.mark.asyncio async def test_preset_conditions(): diff --git a/bbot/test/test_step_1/test_python_api.py b/bbot/test/test_step_1/test_python_api.py index 678593ed1..1c2b0bb51 100644 --- a/bbot/test/test_step_1/test_python_api.py +++ b/bbot/test/test_step_1/test_python_api.py @@ -56,6 +56,10 @@ async def test_python_api(): events = [e async for e in custom_target_scan.async_start()] assert 1 == len([e for e in events if e.type == "ORG_STUB" and e.data == "evilcorp" and "target" in e.tags]) + # presets + scan6 = Scanner("evilcorp.com", presets=["subdomain-enum"]) + assert "sslcert" in scan6.preset.modules + def test_python_api_sync(): from bbot.scanner import Scanner diff --git a/docs/dev/dev_environment.md b/docs/dev/dev_environment.md index 054656150..fd4d5ea94 100644 --- a/docs/dev/dev_environment.md +++ b/docs/dev/dev_environment.md @@ -1,5 +1,7 @@ ## Setting Up a Dev Environment +The following will show you how to set up a fully functioning python environment for devving on BBOT. + ### Installation (Poetry) [Poetry](https://python-poetry.org/) is the recommended method of installation if you want to dev on BBOT. To set up a dev environment with Poetry, you can follow these steps: diff --git a/docs/dev/index.md b/docs/dev/index.md index 8a29e48a7..6315637f0 100644 --- a/docs/dev/index.md +++ b/docs/dev/index.md @@ -4,6 +4,18 @@ BBOT exposes a Python API that allows you to create, start, and stop scans. Documented in this section are commonly-used classes and functions within BBOT, along with usage examples. +## Adding BBOT to Your Python Project + +If you are using Poetry, you can add BBOT to your python environment like this: + +```bash +# stable +poetry add bbot + +# bleeding-edge (dev branch) +poetry add bbot --allow-prereleases +``` + ## Running a BBOT Scan from Python #### Synchronous diff --git a/mkdocs.yml b/mkdocs.yml index c154fb87f..da055c574 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -38,6 +38,7 @@ nav: - Troubleshooting: troubleshooting.md - Developer Manual: - Development Overview: dev/index.md + - How to Set Up a BBOT Dev Environment: dev/dev_environment.md - BBOT Internal Architecture: dev/architecture.md - How to Write a BBOT Module: dev/module_howto.md - Unit Tests: dev/tests.md From a7bcea8bd89eddf9c568a62aa23759df838757ae Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 5 Aug 2024 09:39:50 -0400 Subject: [PATCH 12/24] fix keyboardinterrupt errors --- bbot/core/engine.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/bbot/core/engine.py b/bbot/core/engine.py index b2a9cdd7e..6a5998bd6 100644 --- a/bbot/core/engine.py +++ b/bbot/core/engine.py @@ -401,11 +401,12 @@ async def run_and_return(self, client_id, command_fn, *args, **kwargs): try: result = await command_fn(*args, **kwargs) except BaseException as e: - error = f"Error in {self.name}.{fn_str}: {e}" - self.log.debug(error) - trace = traceback.format_exc() - self.log.debug(trace) - result = {"_e": (error, trace)} + if not in_exception_chain(e, (KeyboardInterrupt, asyncio.CancelledError)): + error = f"Error in {self.name}.{fn_str}: {e}" + self.log.debug(error) + trace = traceback.format_exc() + self.log.debug(trace) + result = {"_e": (error, trace)} finally: self.tasks.pop(client_id, None) self.log.debug(f"{self.name}: Sending response to {fn_str}: {result}") @@ -428,12 +429,13 @@ async def run_and_yield(self, client_id, command_fn, *args, **kwargs): self.log.debug(f"{self.name}: sending iteration for {command_fn.__name__}(): {_}") await self.send_socket_multipart(client_id, _) except BaseException as e: - error = f"Error in {self.name}.{fn_str}: {e}" - trace = traceback.format_exc() - self.log.debug(error) - self.log.debug(trace) - result = {"_e": (error, trace)} - await self.send_socket_multipart(client_id, result) + if not in_exception_chain(e, (KeyboardInterrupt, asyncio.CancelledError)): + error = f"Error in {self.name}.{fn_str}: {e}" + trace = traceback.format_exc() + self.log.debug(error) + self.log.debug(trace) + result = {"_e": (error, trace)} + await self.send_socket_multipart(client_id, result) finally: self.log.debug(f"{self.name} reached end of run-and-yield iteration for {command_fn.__name__}()") # _s == special signal that means StopIteration From 38d544fef6fd6310a317159d66d5e0b423ef1c38 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 5 Aug 2024 13:10:11 -0400 Subject: [PATCH 13/24] raw dns tweak --- bbot/core/event/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index bcf41a37c..e4b089327 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -1436,7 +1436,8 @@ class FILESYSTEM(DictPathEvent): class RAW_DNS_RECORD(DictHostEvent): - pass + # don't emit raw DNS records for affiliates + _always_emit_tags = ["target"] def make_event( From 56eef29305446a4325f3aa0483c212ee70aeb528 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 5 Aug 2024 17:18:08 -0400 Subject: [PATCH 14/24] fix ctrl+c error --- bbot/cli.py | 7 ------- bbot/core/engine.py | 6 ++++-- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/bbot/cli.py b/bbot/cli.py index 47b2c29ad..877f2bcaa 100755 --- a/bbot/cli.py +++ b/bbot/cli.py @@ -24,13 +24,6 @@ www.blacklanternsecurity.com/bbot """ print(ascii_art, file=sys.stderr) - log_to_stderr( - "This is a pre-release of BBOT 2.0. If you upgraded from version 1, we recommend cleaning your old configs etc. before running this version!", - level="WARNING", - ) - log_to_stderr( - "For details, see https://github.com/blacklanternsecurity/bbot/discussions/1540", level="WARNING" - ) scan_name = "" diff --git a/bbot/core/engine.py b/bbot/core/engine.py index 6a5998bd6..70652d456 100644 --- a/bbot/core/engine.py +++ b/bbot/core/engine.py @@ -398,6 +398,7 @@ async def run_and_return(self, client_id, command_fn, *args, **kwargs): with self.client_id_context(client_id): try: self.log.debug(f"{self.name} run-and-return {fn_str}") + result = error_sentinel try: result = await command_fn(*args, **kwargs) except BaseException as e: @@ -409,8 +410,9 @@ async def run_and_return(self, client_id, command_fn, *args, **kwargs): result = {"_e": (error, trace)} finally: self.tasks.pop(client_id, None) - self.log.debug(f"{self.name}: Sending response to {fn_str}: {result}") - await self.send_socket_multipart(client_id, result) + if result is not error_sentinel: + self.log.debug(f"{self.name}: Sending response to {fn_str}: {result}") + await self.send_socket_multipart(client_id, result) except BaseException as e: self.log.critical( f"Unhandled exception in {self.name}.run_and_return({client_id}, {command_fn}, {args}, {kwargs}): {e}" From 82dd0f4037bf1f1bb651acae01a1a8465361dff2 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 5 Aug 2024 20:48:24 -0400 Subject: [PATCH 15/24] dns discovery path troubleshooting --- bbot/core/event/base.py | 10 +- bbot/core/helpers/async_helpers.py | 2 +- bbot/modules/internal/dnsresolve.py | 218 ++++++++++++++++------------ bbot/scanner/scanner.py | 10 -- bbot/test/test_step_1/test_dns.py | 12 +- 5 files changed, 146 insertions(+), 106 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index e4b089327..65dfc60e1 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -350,8 +350,10 @@ def discovery_path(self): """ This event's full discovery context, including those of all its parents """ - full_event_chain = list(reversed(self.get_parents())) + [self] - return [[e.id, e.discovery_context] for e in full_event_chain if e.type != "SCAN"] + parent_path = [] + if self.parent is not None and self != self.parent: + parent_path = self.parent.discovery_path + return parent_path + [[self.id, self.discovery_context]] @property def words(self): @@ -870,6 +872,10 @@ class SCAN(BaseEvent): def _data_human(self): return f"{self.data['name']} ({self.data['id']})" + @property + def discovery_path(self): + return [] + class FINISHED(BaseEvent): """ diff --git a/bbot/core/helpers/async_helpers.py b/bbot/core/helpers/async_helpers.py index 722a63d40..123385135 100644 --- a/bbot/core/helpers/async_helpers.py +++ b/bbot/core/helpers/async_helpers.py @@ -34,7 +34,7 @@ class NamedLock: E.g. simultaneous DNS lookups on the same hostname """ - def __init__(self, max_size=1000): + def __init__(self, max_size=10000): self._cache = LRUCache(maxsize=max_size) @asynccontextmanager diff --git a/bbot/modules/internal/dnsresolve.py b/bbot/modules/internal/dnsresolve.py index fcf7e90af..bd342497c 100644 --- a/bbot/modules/internal/dnsresolve.py +++ b/bbot/modules/internal/dnsresolve.py @@ -64,12 +64,7 @@ async def filter_event(self, event): return True async def handle_event(self, event, **kwargs): - dns_tags = set() - event_whitelisted = False - event_blacklisted = False - event_is_ip = self.helpers.is_ip(event.host) - event_host = str(event.host) event_host_hash = hash(event_host) @@ -92,89 +87,9 @@ async def handle_event(self, event, **kwargs): # dns_tags, dns_children, event_whitelisted, event_blacklisted = self._event_cache[event_host_hash] except KeyError: - main_host_event = self.get_dns_parent(event) - - rdtypes_to_resolve = () - if event_is_ip: - if not self.minimal: - rdtypes_to_resolve = ("PTR",) - else: - if self.minimal: - rdtypes_to_resolve = ("A", "AAAA", "CNAME") - else: - rdtypes_to_resolve = all_rdtypes - - # if missing from cache, do DNS resolution - queries = [(event_host, rdtype) for rdtype in rdtypes_to_resolve] - error_rdtypes = [] - raw_record_events = [] - async for (query, rdtype), (answer, errors) in self.helpers.dns.resolve_raw_batch(queries): - if self.emit_raw_records and rdtype not in ("A", "AAAA", "CNAME", "PTR"): - raw_record_event = self.make_event( - {"host": str(event_host), "type": rdtype, "answer": answer.to_text()}, - "RAW_DNS_RECORD", - parent=main_host_event, - tags=[f"{rdtype.lower()}-record"], - context=f"{rdtype} lookup on {{event.parent.host}} produced {{event.type}}", - ) - raw_record_events.append(raw_record_event) - if errors: - error_rdtypes.append(rdtype) - for _rdtype, host in extract_targets(answer): - dns_tags.add(f"{rdtype.lower()}-record") - try: - main_host_event.dns_children[_rdtype].add(host) - except KeyError: - main_host_event.dns_children[_rdtype] = {host} - - # if there were dns resolution errors, notify the user with tags - for rdtype in error_rdtypes: - if rdtype not in main_host_event.dns_children: - dns_tags.add(f"{rdtype.lower()}-error") - - # if there weren't any DNS children and it's not an IP address, tag as unresolved - if not main_host_event.dns_children and not event_is_ip: - dns_tags.add("unresolved") - - # check DNS children against whitelists and blacklists - for rdtype, children in main_host_event.dns_children.items(): - if event_blacklisted: - break - for host in children: - # whitelisting / blacklisting based on resolved hosts - if rdtype in ("A", "AAAA", "CNAME"): - # having a CNAME to an in-scope resource doesn't make you in-scope - if (not event_whitelisted) and rdtype != "CNAME": - with suppress(ValidationError): - if self.scan.whitelisted(host): - event_whitelisted = True - dns_tags.add(f"dns-whitelisted-{rdtype.lower()}") - # CNAME to a blacklisted resource, means you're blacklisted - with suppress(ValidationError): - if self.scan.blacklisted(host): - dns_tags.add("blacklisted") - dns_tags.add(f"dns-blacklisted-{rdtype.lower()}") - event_blacklisted = True - event_whitelisted = False - break - - # check for private IPs - try: - ip = ipaddress.ip_address(host) - if ip.is_private: - dns_tags.add("private-ip") - except ValueError: - continue - - # add DNS tags to main host - for tag in dns_tags: - main_host_event.add_tag(tag) - - # set resolved_hosts attribute - for rdtype, children in main_host_event.dns_children.items(): - if rdtype in ("A", "AAAA", "CNAME"): - for host in children: - main_host_event._resolved_hosts.add(host) + main_host_event, dns_tags, event_whitelisted, event_blacklisted, raw_record_events = ( + await self.resolve_event(event) + ) # if we're not blacklisted, emit the main host event and all its raw records if not event_blacklisted: @@ -202,8 +117,19 @@ async def handle_event(self, event, **kwargs): if not self.minimal: in_dns_scope = -1 < event.scope_distance < self._dns_search_distance for rdtype, records in main_host_event.dns_children.items(): - module = self.scan._make_dummy_module_dns(rdtype) + module = self._make_dummy_module(rdtype) for record in records: + parents = main_host_event.get_parents() + for e in parents: + e_is_host = e.type in ("DNS_NAME", "IP_ADDRESS") + e_parent_matches = str(e.parent.host) == str(main_host_event.host) + e_host_matches = str(e.data) == str(record) + e_module_matches = str(e.module) == str(module) + if e_is_host and e_parent_matches and e_host_matches and e_module_matches: + self.critical( + f"TRYING TO EMIT ALREADY-EMITTED {record}:{rdtype} CHILD OF {main_host_event}, parents: {parents}" + ) + return try: child_event = self.scan.make_event( record, "DNS_NAME", module=module, parent=main_host_event @@ -240,6 +166,99 @@ async def handle_event(self, event, **kwargs): if event.type == "DNS_NAME" and "unresolved" in event.tags: event.type = "DNS_NAME_UNRESOLVED" + async def resolve_event(self, event): + dns_tags = set() + event_whitelisted = False + event_blacklisted = False + + main_host_event = self.get_dns_parent(event) + event_host = str(event.host) + event_is_ip = self.helpers.is_ip(event.host) + + rdtypes_to_resolve = () + if event_is_ip: + if not self.minimal: + rdtypes_to_resolve = ("PTR",) + else: + if self.minimal: + rdtypes_to_resolve = ("A", "AAAA", "CNAME") + else: + rdtypes_to_resolve = all_rdtypes + + # if missing from cache, do DNS resolution + queries = [(event_host, rdtype) for rdtype in rdtypes_to_resolve] + error_rdtypes = [] + raw_record_events = [] + async for (query, rdtype), (answer, errors) in self.helpers.dns.resolve_raw_batch(queries): + if self.emit_raw_records and rdtype not in ("A", "AAAA", "CNAME", "PTR"): + raw_record_event = self.make_event( + {"host": str(event_host), "type": rdtype, "answer": answer.to_text()}, + "RAW_DNS_RECORD", + parent=main_host_event, + tags=[f"{rdtype.lower()}-record"], + context=f"{rdtype} lookup on {{event.parent.host}} produced {{event.type}}", + ) + raw_record_events.append(raw_record_event) + if errors: + error_rdtypes.append(rdtype) + dns_tags.add(f"{rdtype.lower()}-record") + for _rdtype, host in extract_targets(answer): + try: + main_host_event.dns_children[_rdtype].add(host) + except KeyError: + main_host_event.dns_children[_rdtype] = {host} + + # if there were dns resolution errors, notify the user with tags + for rdtype in error_rdtypes: + if rdtype not in main_host_event.dns_children: + dns_tags.add(f"{rdtype.lower()}-error") + + # if there weren't any DNS children and it's not an IP address, tag as unresolved + if not main_host_event.dns_children and not event_is_ip: + dns_tags.add("unresolved") + + # check DNS children against whitelists and blacklists + for rdtype, children in main_host_event.dns_children.items(): + if event_blacklisted: + break + for host in children: + # whitelisting / blacklisting based on resolved hosts + if rdtype in ("A", "AAAA", "CNAME"): + # having a CNAME to an in-scope resource doesn't make you in-scope + if (not event_whitelisted) and rdtype != "CNAME": + with suppress(ValidationError): + if self.scan.whitelisted(host): + event_whitelisted = True + dns_tags.add(f"dns-whitelisted-{rdtype.lower()}") + # CNAME to a blacklisted resource, means you're blacklisted + with suppress(ValidationError): + if self.scan.blacklisted(host): + dns_tags.add("blacklisted") + dns_tags.add(f"dns-blacklisted-{rdtype.lower()}") + event_blacklisted = True + event_whitelisted = False + break + + # check for private IPs + try: + ip = ipaddress.ip_address(host) + if ip.is_private: + dns_tags.add("private-ip") + except ValueError: + continue + + # add DNS tags to main host + for tag in dns_tags: + main_host_event.add_tag(tag) + + # set resolved_hosts attribute + for rdtype, children in main_host_event.dns_children.items(): + if rdtype in ("A", "AAAA", "CNAME"): + for host in children: + main_host_event._resolved_hosts.add(host) + + return main_host_event, dns_tags, event_whitelisted, event_blacklisted, raw_record_events + async def handle_wildcard_event(self, event): self.debug(f"Entering handle_wildcard_event({event})") try: @@ -299,3 +318,24 @@ def get_dns_parent(self, event): context="{event.parent.type} has host {event.type}: {event.host}", tags=tags, ) + + def _make_dummy_module(self, name): + try: + dummy_module = self.scan.dummy_modules[name] + except KeyError: + dummy_module = self.scan._make_dummy_module(name=name, _type="DNS") + dummy_module._priority = 4 + dummy_module.suppress_dupes = False + self.scan.dummy_modules[name] = dummy_module + return dummy_module + + def _dns_child_dedup_hash(self, parent_host, host, rdtype): + # we deduplicate NS records by their parent domain + # because otherwise every DNS_NAME has one, and it gets super messy + if rdtype == "NS": + _, parent_domain = self.helpers.split_domain(parent_host) + return hash(f"{parent_domain}:{host}") + return hash(f"{parent_host}:{host}:{rdtype}") + + def _main_outgoing_dedup_hash(self, event): + return hash(f"{event.host}") diff --git a/bbot/scanner/scanner.py b/bbot/scanner/scanner.py index fa2abfadf..4e1d5a104 100644 --- a/bbot/scanner/scanner.py +++ b/bbot/scanner/scanner.py @@ -1208,16 +1208,6 @@ def _make_dummy_module(self, name, _type="scan"): self.dummy_modules[name] = dummy return dummy - def _make_dummy_module_dns(self, name): - try: - dummy_module = self.dummy_modules[name] - except KeyError: - dummy_module = self._make_dummy_module(name=name, _type="DNS") - dummy_module.suppress_dupes = False - dummy_module._priority = 4 - self.dummy_modules[name] = dummy_module - return dummy_module - from bbot.modules.base import BaseModule diff --git a/bbot/test/test_step_1/test_dns.py b/bbot/test/test_step_1/test_dns.py index b2bcb68fe..5f98f4939 100644 --- a/bbot/test/test_step_1/test_dns.py +++ b/bbot/test/test_step_1/test_dns.py @@ -241,16 +241,20 @@ async def test_wildcards(bbot_scanner): assert wildcard_event3.data == "github.io" # dns resolve distance - event_distance_0 = scan.make_event("8.8.8.8", module=scan._make_dummy_module_dns("PTR"), parent=scan.root_event) + event_distance_0 = scan.make_event( + "8.8.8.8", module=scan.modules["dnsresolve"]._make_dummy_module("PTR"), parent=scan.root_event + ) assert event_distance_0.dns_resolve_distance == 0 event_distance_1 = scan.make_event( - "evilcorp.com", module=scan._make_dummy_module_dns("A"), parent=event_distance_0 + "evilcorp.com", module=scan.modules["dnsresolve"]._make_dummy_module("A"), parent=event_distance_0 ) assert event_distance_1.dns_resolve_distance == 1 - event_distance_2 = scan.make_event("1.2.3.4", module=scan._make_dummy_module_dns("PTR"), parent=event_distance_1) + event_distance_2 = scan.make_event( + "1.2.3.4", module=scan.modules["dnsresolve"]._make_dummy_module("PTR"), parent=event_distance_1 + ) assert event_distance_2.dns_resolve_distance == 1 event_distance_3 = scan.make_event( - "evilcorp.org", module=scan._make_dummy_module_dns("A"), parent=event_distance_2 + "evilcorp.org", module=scan.modules["dnsresolve"]._make_dummy_module("A"), parent=event_distance_2 ) assert event_distance_3.dns_resolve_distance == 2 From 20abca10645ab7bf64d9f12ae4677701ec1bfde2 Mon Sep 17 00:00:00 2001 From: github-actions Date: Mon, 5 Aug 2024 21:26:19 -0400 Subject: [PATCH 16/24] fix dns discovery path duplication --- bbot/modules/internal/dnsresolve.py | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/bbot/modules/internal/dnsresolve.py b/bbot/modules/internal/dnsresolve.py index bd342497c..6dbf1c5eb 100644 --- a/bbot/modules/internal/dnsresolve.py +++ b/bbot/modules/internal/dnsresolve.py @@ -1,6 +1,6 @@ import ipaddress from contextlib import suppress -from cachetools import LRUCache +from cachetools import LFUCache from bbot.errors import ValidationError from bbot.core.helpers.dns.engine import all_rdtypes @@ -10,6 +10,13 @@ class DNSResolve(InterceptModule): + """ + TODO: + - scrap event cache in favor of the parent backtracking method + - don't duplicate resolution on the same host + - clean up wildcard checking to only happen once, and re-emit/abort if one is detected + """ + watched_events = ["*"] _priority = 1 scope_distance_modifier = None @@ -36,7 +43,7 @@ async def setup(self): self._emit_raw_records = None # event resolution cache - self._event_cache = LRUCache(maxsize=10000) + self._event_cache = LFUCache(maxsize=10000) self._event_cache_locks = NamedLock() self.host_module = self.HostModule(self.scan) @@ -91,8 +98,9 @@ async def handle_event(self, event, **kwargs): await self.resolve_event(event) ) - # if we're not blacklisted, emit the main host event and all its raw records - if not event_blacklisted: + # if we're not blacklisted and we haven't already done it, emit the main host event and all its raw records + main_host_resolved = getattr(main_host_event, "_resolved", False) + if not event_blacklisted and not main_host_resolved: if event_whitelisted: self.debug( f"Making {main_host_event} in-scope because it resolves to an in-scope resource (A/AAAA)" @@ -126,7 +134,7 @@ async def handle_event(self, event, **kwargs): e_host_matches = str(e.data) == str(record) e_module_matches = str(e.module) == str(module) if e_is_host and e_parent_matches and e_host_matches and e_module_matches: - self.critical( + self.trace( f"TRYING TO EMIT ALREADY-EMITTED {record}:{rdtype} CHILD OF {main_host_event}, parents: {parents}" ) return @@ -146,6 +154,9 @@ async def handle_event(self, event, **kwargs): f'Event validation failed for DNS child of {main_host_event}: "{record}" ({rdtype}): {e}' ) + # mark the host as resolved + main_host_event._resolved = True + # store results in cache self._event_cache[event_host_hash] = main_host_event, dns_tags, event_whitelisted, event_blacklisted From 9949408fdc48f058a4d5befaa18b21eaf2a83a59 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 7 Aug 2024 07:59:59 -0400 Subject: [PATCH 17/24] DNS todo --- bbot/modules/internal/dnsresolve.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bbot/modules/internal/dnsresolve.py b/bbot/modules/internal/dnsresolve.py index 6dbf1c5eb..0877c3aa7 100644 --- a/bbot/modules/internal/dnsresolve.py +++ b/bbot/modules/internal/dnsresolve.py @@ -15,6 +15,7 @@ class DNSResolve(InterceptModule): - scrap event cache in favor of the parent backtracking method - don't duplicate resolution on the same host - clean up wildcard checking to only happen once, and re-emit/abort if one is detected + - same thing with main_host_event. we should never be processing two events - only one. """ watched_events = ["*"] From 3e78bf08d7d50e1a40bf46ecbf69eacdae31c3ef Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 7 Aug 2024 10:17:41 -0400 Subject: [PATCH 18/24] remove old link --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 76b5541e0..b88ab4c6b 100644 --- a/README.md +++ b/README.md @@ -330,8 +330,6 @@ For details, see [Configuration](https://www.blacklanternsecurity.com/bbot/Stabl ## Documentation -Also see: [Bleeding-Edge Documentation (`dev` branch)](https://www.blacklanternsecurity.com/bbot/dev_branch) - - **User Manual** - **Basics** From 40d2d10fe479167e75e89727553c4ff7b895cd09 Mon Sep 17 00:00:00 2001 From: TheTechromancer <20261699+TheTechromancer@users.noreply.github.com> Date: Wed, 7 Aug 2024 10:41:13 -0700 Subject: [PATCH 19/24] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b88ab4c6b..51e7a5300 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ config: -BBOT consistently finds 20-50% more subdomains than other tools. The bigger the domain, the bigger the difference. To learn how this is possible, see [How It Works](https://www.blacklanternsecurity.com/bbot/Stable/how_it_works/). +BBOT consistently finds 20-50% more subdomains than other tools. The bigger the domain, the bigger the difference. To learn how this is possible, see [How It Works](https://www.blacklanternsecurity.com/bbot/Dev/how_it_works/). ![subdomain-stats-ebay](https://github.com/blacklanternsecurity/bbot/assets/20261699/de3e7f21-6f52-4ac4-8eab-367296cd385f) From fdf983846eedfc95fb21b5d78a7b2fe3aebe55fe Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 8 Aug 2024 04:23:29 -0400 Subject: [PATCH 20/24] fix context formatting error --- bbot/core/event/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 65dfc60e1..bda4eae22 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -340,9 +340,10 @@ def discovery_context(self): @discovery_context.setter def discovery_context(self, context): try: + context = context.replace("{", "{{").replace("}", "}}") self._discovery_context = context.format(module=self.module, event=self) except Exception as e: - log.warning(f"Error formatting discovery context for {self}: {e} (context: '{context}')") + log.trace(f"Error formatting discovery context for {self}: {e} (context: '{context}')") self._discovery_context = context @property From 999399b20584d4b9294045c771279d0523a65e0f Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 8 Aug 2024 04:24:58 -0400 Subject: [PATCH 21/24] cleaned context --- bbot/core/event/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index bda4eae22..a2d6b4b0b 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -340,8 +340,8 @@ def discovery_context(self): @discovery_context.setter def discovery_context(self, context): try: - context = context.replace("{", "{{").replace("}", "}}") - self._discovery_context = context.format(module=self.module, event=self) + cleaned_context = context.replace("{", "{{").replace("}", "}}") + self._discovery_context = cleaned_context.format(module=self.module, event=self) except Exception as e: log.trace(f"Error formatting discovery context for {self}: {e} (context: '{context}')") self._discovery_context = context From 0b0f03c09cbf12799877dced03cbbcacc89fb793 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 8 Aug 2024 08:44:32 -0400 Subject: [PATCH 22/24] fix discovery path, write tests --- bbot/core/event/base.py | 8 ++++++-- bbot/test/bbot_fixtures.py | 12 ++++++------ bbot/test/test_step_1/test_events.py | 7 +++++++ 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index a2d6b4b0b..9024049f9 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -169,6 +169,7 @@ def __init__( self._resolved_hosts = set() self.dns_children = dict() self._discovery_context = "" + self._discovery_context_regex = re.compile(r'\{(?:event|module)[^}]*\}') self.web_spider_distance = 0 # for creating one-off events without enforcing parent requirement @@ -339,9 +340,12 @@ def discovery_context(self): @discovery_context.setter def discovery_context(self, context): + def replace(match): + s = match.group() + return s.format(module=self.module, event=self) + try: - cleaned_context = context.replace("{", "{{").replace("}", "}}") - self._discovery_context = cleaned_context.format(module=self.module, event=self) + self._discovery_context = self._discovery_context_regex.sub(replace, context) except Exception as e: log.trace(f"Error formatting discovery context for {self}: {e} (context: '{context}')") self._discovery_context = context diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index 1c9631fac..86110a6cb 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -208,9 +208,9 @@ class bbot_events: return bbot_events -@pytest.fixture(scope="session", autouse=True) -def install_all_python_deps(): - deps_pip = set() - for module in DEFAULT_PRESET.module_loader.preloaded().values(): - deps_pip.update(set(module.get("deps", {}).get("pip", []))) - subprocess.run([sys.executable, "-m", "pip", "install"] + list(deps_pip)) +# @pytest.fixture(scope="session", autouse=True) +# def install_all_python_deps(): +# deps_pip = set() +# for module in DEFAULT_PRESET.module_loader.preloaded().values(): +# deps_pip.update(set(module.get("deps", {}).get("pip", []))) +# subprocess.run([sys.executable, "-m", "pip", "install"] + list(deps_pip)) diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 4f42c1bb0..792eb946d 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -587,9 +587,16 @@ async def handle_event(self, event): scan.modules["dummy_module"] = dummy_module + # test discovery context test_event = dummy_module.make_event("evilcorp.com", "DNS_NAME", parent=scan.root_event) assert test_event.discovery_context == "dummy_module discovered DNS_NAME: evilcorp.com" + test_event2 = dummy_module.make_event("evilcorp.com", "DNS_NAME", parent=scan.root_event, context="{module} {found} {event.host}") + assert test_event2.discovery_context == "dummy_module {found} evilcorp.com" + # jank input + test_event3 = dummy_module.make_event("http://evilcorp.com/{http://evilcorp.org!@#%@#$:,,,}", "URL_UNVERIFIED", parent=scan.root_event) + assert test_event3.discovery_context == "dummy_module discovered URL_UNVERIFIED: http://evilcorp.com/{http:/evilcorp.org!@" + events = [e async for e in scan.async_start()] assert len(events) == 6 From 43651fd47e0762fa6100cc11639d49b4049ca72f Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 8 Aug 2024 08:44:51 -0400 Subject: [PATCH 23/24] blacked --- bbot/core/event/base.py | 2 +- bbot/test/bbot_fixtures.py | 12 ++++++------ bbot/test/test_step_1/test_events.py | 13 ++++++++++--- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index 9024049f9..a8e55701e 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -169,7 +169,7 @@ def __init__( self._resolved_hosts = set() self.dns_children = dict() self._discovery_context = "" - self._discovery_context_regex = re.compile(r'\{(?:event|module)[^}]*\}') + self._discovery_context_regex = re.compile(r"\{(?:event|module)[^}]*\}") self.web_spider_distance = 0 # for creating one-off events without enforcing parent requirement diff --git a/bbot/test/bbot_fixtures.py b/bbot/test/bbot_fixtures.py index 86110a6cb..1c9631fac 100644 --- a/bbot/test/bbot_fixtures.py +++ b/bbot/test/bbot_fixtures.py @@ -208,9 +208,9 @@ class bbot_events: return bbot_events -# @pytest.fixture(scope="session", autouse=True) -# def install_all_python_deps(): -# deps_pip = set() -# for module in DEFAULT_PRESET.module_loader.preloaded().values(): -# deps_pip.update(set(module.get("deps", {}).get("pip", []))) -# subprocess.run([sys.executable, "-m", "pip", "install"] + list(deps_pip)) +@pytest.fixture(scope="session", autouse=True) +def install_all_python_deps(): + deps_pip = set() + for module in DEFAULT_PRESET.module_loader.preloaded().values(): + deps_pip.update(set(module.get("deps", {}).get("pip", []))) + subprocess.run([sys.executable, "-m", "pip", "install"] + list(deps_pip)) diff --git a/bbot/test/test_step_1/test_events.py b/bbot/test/test_step_1/test_events.py index 792eb946d..c319559d4 100644 --- a/bbot/test/test_step_1/test_events.py +++ b/bbot/test/test_step_1/test_events.py @@ -591,11 +591,18 @@ async def handle_event(self, event): test_event = dummy_module.make_event("evilcorp.com", "DNS_NAME", parent=scan.root_event) assert test_event.discovery_context == "dummy_module discovered DNS_NAME: evilcorp.com" - test_event2 = dummy_module.make_event("evilcorp.com", "DNS_NAME", parent=scan.root_event, context="{module} {found} {event.host}") + test_event2 = dummy_module.make_event( + "evilcorp.com", "DNS_NAME", parent=scan.root_event, context="{module} {found} {event.host}" + ) assert test_event2.discovery_context == "dummy_module {found} evilcorp.com" # jank input - test_event3 = dummy_module.make_event("http://evilcorp.com/{http://evilcorp.org!@#%@#$:,,,}", "URL_UNVERIFIED", parent=scan.root_event) - assert test_event3.discovery_context == "dummy_module discovered URL_UNVERIFIED: http://evilcorp.com/{http:/evilcorp.org!@" + test_event3 = dummy_module.make_event( + "http://evilcorp.com/{http://evilcorp.org!@#%@#$:,,,}", "URL_UNVERIFIED", parent=scan.root_event + ) + assert ( + test_event3.discovery_context + == "dummy_module discovered URL_UNVERIFIED: http://evilcorp.com/{http:/evilcorp.org!@" + ) events = [e async for e in scan.async_start()] assert len(events) == 6 From 9cad808ff4972e0e36d4c0dd417d3fe135f62761 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 8 Aug 2024 09:47:25 -0400 Subject: [PATCH 24/24] suppress duplicate storage buckets --- bbot/core/event/base.py | 4 ++ bbot/modules/bucket_azure.py | 4 ++ bbot/modules/internal/cloudcheck.py | 5 ++- bbot/modules/templates/bucket.py | 19 +++++++- bbot/scanner/manager.py | 6 +++ .../module_tests/test_module_bucket_azure.py | 43 +++++++++++++++++++ 6 files changed, 78 insertions(+), 3 deletions(-) diff --git a/bbot/core/event/base.py b/bbot/core/event/base.py index a8e55701e..9a5a9b869 100644 --- a/bbot/core/event/base.py +++ b/bbot/core/event/base.py @@ -113,6 +113,9 @@ class BaseEvent: _data_validator = None # Whether to increment scope distance if the child and parent hosts are the same _scope_distance_increment_same_host = False + # Don't allow duplicates to occur within a parent chain + # In other words, don't emit the event if the same one already exists in its discovery context + _suppress_chain_dupes = False def __init__( self, @@ -1176,6 +1179,7 @@ def pretty_string(self): class STORAGE_BUCKET(DictEvent, URL_UNVERIFIED): _always_emit = True + _suppress_chain_dupes = True class _data_validator(BaseModel): name: str diff --git a/bbot/modules/bucket_azure.py b/bbot/modules/bucket_azure.py index c89034ccb..032e409b4 100644 --- a/bbot/modules/bucket_azure.py +++ b/bbot/modules/bucket_azure.py @@ -30,3 +30,7 @@ def check_bucket_exists(self, bucket_name, response): status_code = getattr(response, "status_code", 0) existent_bucket = status_code != 0 return existent_bucket, set() + + def clean_bucket_url(self, url): + # only return root URL + return "/".join(url.split("/")[:3]) diff --git a/bbot/modules/internal/cloudcheck.py b/bbot/modules/internal/cloudcheck.py index 15d9bf364..9b7b6e147 100644 --- a/bbot/modules/internal/cloudcheck.py +++ b/bbot/modules/internal/cloudcheck.py @@ -14,7 +14,9 @@ async def setup(self): def make_dummy_modules(self): self.dummy_modules = {} for provider_name, provider in self.helpers.cloud.providers.items(): - self.dummy_modules[provider_name] = self.scan._make_dummy_module(f"cloud_{provider_name}", _type="scan") + module = self.scan._make_dummy_module(f"cloud_{provider_name}", _type="scan") + module.default_discovery_context = "{module} derived {event.type}: {event.host}" + self.dummy_modules[provider_name] = module async def filter_event(self, event): if (not event.host) or (event.type in ("IP_RANGE",)): @@ -27,6 +29,7 @@ async def handle_event(self, event, **kwargs): self.make_dummy_modules() # cloud tagging by hosts hosts_to_check = set(str(s) for s in event.resolved_hosts) + # we use the original host, since storage buckets hostnames might be collapsed to _wildcard hosts_to_check.add(str(event.host_original)) for host in hosts_to_check: for provider, provider_type, subnet in self.helpers.cloudcheck(host): diff --git a/bbot/modules/templates/bucket.py b/bbot/modules/templates/bucket.py index f5a10387f..3b7bde789 100644 --- a/bbot/modules/templates/bucket.py +++ b/bbot/modules/templates/bucket.py @@ -53,7 +53,7 @@ async def handle_dns_name(self, event): bucket_name = d.join(split) buckets.add(bucket_name) async for bucket_name, url, tags, num_buckets in self.brute_buckets(buckets, permutations=self.permutations): - await self.emit_event( + await self.emit_storage_bucket( {"name": bucket_name, "url": url}, "STORAGE_BUCKET", parent=event, @@ -79,7 +79,7 @@ async def handle_storage_bucket(self, event): async for bucket_name, new_url, tags, num_buckets in self.brute_buckets( [bucket_name], permutations=self.permutations, omit_base=True ): - await self.emit_event( + await self.emit_storage_bucket( {"name": bucket_name, "url": new_url}, "STORAGE_BUCKET", parent=event, @@ -87,6 +87,17 @@ async def handle_storage_bucket(self, event): context=f"{{module}} tried {num_buckets:,} variations of {url} and found {{event.type}} at {new_url}", ) + async def emit_storage_bucket(self, event_data, event_type, parent, tags, context): + event_data["url"] = self.clean_bucket_url(event_data["url"]) + self.hugewarning(event_data) + await self.emit_event( + event_data, + event_type, + parent=parent, + tags=tags, + context=context, + ) + async def brute_buckets(self, buckets, permutations=False, omit_base=False): buckets = set(buckets) new_buckets = set(buckets) @@ -112,6 +123,10 @@ async def brute_buckets(self, buckets, permutations=False, omit_base=False): if existent_bucket: yield bucket_name, url, tags, num_buckets + def clean_bucket_url(self, url): + # if needed, modify the bucket url before emitting it + return url + def build_bucket_request(self, bucket_name, base_domain, region): url = self.build_url(bucket_name, base_domain, region) return url, {} diff --git a/bbot/scanner/manager.py b/bbot/scanner/manager.py index cdae044a8..70658e69d 100644 --- a/bbot/scanner/manager.py +++ b/bbot/scanner/manager.py @@ -224,6 +224,12 @@ async def handle_event(self, event, **kwargs): self.debug(f"Re-queuing internal event {parent} with parent {event} to prevent graph orphan") await self.emit_event(parent) + if event._suppress_chain_dupes: + for parent in event.get_parents(): + if parent == event: + return False, f"an identical parent {event} was found, and _suppress_chain_dupes=True" + + # custom callback - abort event emission it returns true abort_result = False if callable(abort_if): async with self.scan._acatch(context=abort_if): diff --git a/bbot/test/test_step_2/module_tests/test_module_bucket_azure.py b/bbot/test/test_step_2/module_tests/test_module_bucket_azure.py index 782a71c32..a3c866c08 100644 --- a/bbot/test/test_step_2/module_tests/test_module_bucket_azure.py +++ b/bbot/test/test_step_2/module_tests/test_module_bucket_azure.py @@ -1,4 +1,5 @@ from .test_module_bucket_amazon import * +from .base import ModuleTestBase class TestBucket_Azure(Bucket_Amazon_Base): @@ -11,3 +12,45 @@ def url_setup(self): self.url_1 = f"https://{self.random_bucket_1}" self.url_2 = f"https://{self.random_bucket_2}" self.url_3 = f"https://{self.random_bucket_3}/{random_bucket_name_3}?restype=container" + + +class TestBucket_Azure_NoDup(ModuleTestBase): + targets = ["tesla.com"] + module_name = "bucket_azure" + config_overrides = {"cloudcheck": True} + + async def setup_before_prep(self, module_test): + module_test.httpx_mock.add_response( + url=f"https://tesla.blob.core.windows.net/tesla?restype=container", + text="", + ) + await module_test.mock_dns( + { + "tesla.com": {"A": ["1.2.3.4"]}, + "tesla.blob.core.windows.net": {"A": ["1.2.3.4"]}, + } + ) + + def check(self, module_test, events): + assert 1 == len([e for e in events if e.type == "STORAGE_BUCKET"]) + bucket_event = [e for e in events if e.type == "STORAGE_BUCKET"][0] + assert bucket_event.data["name"] == "tesla" + assert bucket_event.data["url"] == "https://tesla.blob.core.windows.net/" + assert ( + bucket_event.discovery_context + == f"bucket_azure tried bucket variations of {event.data} and found {{event.type}} at {url}" + ) + + +class TestBucket_Azure_NoDup(TestBucket_Azure_NoDup): + """ + This tests _suppress_chain_dupes functionality to make sure it works as expected + """ + + async def setup_after_prep(self, module_test): + from bbot.core.event.base import STORAGE_BUCKET + + module_test.monkeypatch.setattr(STORAGE_BUCKET, "_suppress_chain_dupes", False) + + def check(self, module_test, events): + assert 2 == len([e for e in events if e.type == "STORAGE_BUCKET"])