diff --git a/README.md b/README.md index 1a08f131c..6c491e278 100644 --- a/README.md +++ b/README.md @@ -147,62 +147,90 @@ $ detect-secrets audit .secrets.baseline ### Usage in Other Python Scripts -**Basic Use:** +Detect-secrets API for python scripts supports scans for secrets in strings, files, and Git repositories. It supports scanning with custom settings or with advanced settings by providing Plugins and Filters. Git repository scanning allows either all files or only Git-tracked files in a local repository. +**Scanning with Default Settings** ```python -from detect_secrets import SecretsCollection -from detect_secrets.settings import default_settings +#scanning a string with default settings +from detect_secrets.api import scan_string -secrets = SecretsCollection() -with default_settings(): - secrets.scan_file('test_data/config.ini') +string_to_check = "AWS_SECRET_KEY = 'AKIAIOSFODNN7EXAMPLE'" +secrets = scan_string(string=string) +print(secrets) +#scanning a file with default settings +from detect_secrets.api import scan_file -import json -print(json.dumps(secrets.json(), indent=2)) -``` +secrets = scan_file(filepath='/path/to/file.txt') +print(secrets) + +#scanning a git repo with default settings +from detect_secrets.api import scan_git_repository -**More Advanced Configuration:** +#scanning a git repo with default settings, only git tracked files +secrets = scan_git_repository(repo_path='/path/to/repository') +print(secrets) + +#scanning a git repo with default settings, all files +secrets = scan_git_repository(repo_path='/path/to/repository', scan_all_files=True) +print(secrets) +``` +**Scanning with More Advanced Configurations** ```python -from detect_secrets import SecretsCollection -from detect_secrets.settings import transient_settings - -secrets = SecretsCollection() -with transient_settings({ - # Only run scans with only these plugins. - # This format is the same as the one that is saved in the generated baseline. - 'plugins_used': [ - # Example of configuring a built-in plugin - { - 'name': 'Base64HighEntropyString', - 'limit': 5.0, - }, - - # Example of using a custom plugin - { - 'name': 'HippoDetector', - 'path': 'file:///Users/aaronloo/Documents/github/detect-secrets/testing/plugins.py', - }, - ], - - # We can also specify whichever additional filters we want. - # This is an example of using the function `is_identified_by_ML_model` within the - # local file `./private-filters/example.py`. - 'filters_used': [ - { - 'path': 'file://private-filters/example.py::is_identified_by_ML_model', - }, - ] -}) as settings: - # If we want to make any further adjustments to the created settings object (e.g. - # disabling default filters), we can do so as such. - settings.disable_filters( - 'detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign', - 'detect_secrets.filters.heuristic.is_likely_id_string', - ) - - secrets.scan_file('test_data/config.ini') +# Only run scans with only these plugins. +# This format is the same as the one that is saved in the generated baseline. +plugins_used = [ + # Example of configuring a built-in plugin + { + 'name': 'Base64HighEntropyString', + 'limit': 5.0, + }, + # Example of using a custom plugin + { + 'name': 'HippoDetector', + 'path': 'file:///Users/aaronloo/Documents/github/detect-secrets/testing/plugins.py', + }, +] + +# We can also specify whichever additional filters we want. +# This is an example of using the function `is_identified_by_ML_model` within the +# local file `./private-filters/example.py`. +filters_used = [ + { + 'path': 'file://private-filters/example.py::is_identified_by_ML_model', + }, +] +# get default settings +from detect_secrets.api import get_setting +settings = get_settings() +print(settings) + +# get settings with advanced configuration +setting = get_settings(plugins=plugins_used, fileters=filters_used) + +# scanning a string with advanced configuration +from detect_secrets.api import scan_string + +secrets = scan_string(string=string, plugins=plugins_used, filters=filters_used) +print(secrets) + +# scanning a string with advanced configuration +from detect_secrets.api import scan_file + +secrets = scan_file(filepath='path/to/file', plugins=plugins_used, filters=filters_used) +print(secrets) + +# scanning a string with advanced configuration +from detect_secrets.api import scan_git_reposiroty + +# Only Git tracked files +secrets = scan_git_repository(repo_path='path/to/git/repo', plugins=plugins_used, filters=filters_used) +print(secrets) + +# All files +secrets = scan_git_repository(repo_path='path/to/git/repo', scan_all_files=True, plugins=plugins_used, filters=filters_used) +print(secrets) ``` ## Installation diff --git a/detect_secrets/api.py b/detect_secrets/api.py new file mode 100644 index 000000000..dbc89b8ba --- /dev/null +++ b/detect_secrets/api.py @@ -0,0 +1,180 @@ +import os +from typing import Dict +from typing import List + +from git import Repo + +from detect_secrets import SecretsCollection +from detect_secrets.settings import default_settings +from detect_secrets.settings import transient_settings + + +def get_settings(filters: list = None, plugins: list = None) -> Dict[str, List]: + """ + Return used plugins and filters to be used to scan with provided params + """ + if filters and not isinstance(filters, list): + raise ValueError(f"Error: '{filters}' must be List object") + + if plugins and not isinstance(plugins, list): + raise ValueError(f"Error: '{plugins}' must be List object") + + if filters: + filters_used = filters + else: + filters_used = [] + with default_settings() as settings: + for key in settings.filters: + filters_used.append({'path': key}) + + if plugins: + plugins_used = plugins + else: + plugins_used = [] + with default_settings() as settings: + for key in settings.plugins: + plugins_used.append({'name': key}) + + return {'plugins': plugins_used, 'filters': filters_used} + + +def scan_string( + string: str, filters: list = None, plugins: list = None, +) -> Dict[str, List]: + """ + Scan a string for secrets using detect-secrets with custom filters and plugins + + :param string: String to scan + :param filters: Custom filters for detect-secrets + :param plugins: Custom plugins for detect-secrets + :return: Detected secrets in str format + """ + if not isinstance(string, str): + raise ValueError(f"Error: '{string}' must be 'string' object") + + if filters and not isinstance(filters, list): + raise ValueError(f"Error: '{filters}' must be List object") + + if plugins and not isinstance(plugins, list): + raise ValueError(f"Error: '{plugins}' must be List object") + + # Initialize a SecretsCollection + secrets = SecretsCollection() + + # Load default settings if no filters and plugins provided: + if not filters and not plugins: + settings = default_settings() + # Scan the string + with settings: + secrets.scan_string(string) + return secrets.json() + elif filters and not plugins: + plugins = get_settings(plugins=plugins).get('plugins') + elif not filters and plugins: + filters = get_settings(filters=filters).get('filters') + + # Scan the string + settings = transient_settings({'plugins_used': plugins, 'filters_used': filters}) + with settings: + secrets.scan_string(string) + return secrets.json() + + +def scan_file( + filepath: str, filters: list = None, plugins: list = None, +) -> Dict[str, List]: + """ + Scan a file for secrets using detect-secrets with custom filters and plugins + + :param filepath: Path to the file to scan + :param filters: Custom filters for detect-secrets + :param plugins: Custom plugins for detect-secrets + :return: Detected secrets in str format + """ + if not isinstance(filepath, str): + raise ValueError( + f"Error: '{filepath}' must be 'string' formatted path to a file", + ) + + if filters and not isinstance(filters, list): + raise ValueError(f"Error: '{filters}' must be List object") + + if plugins and not isinstance(plugins, list): + raise ValueError(f"Error: '{plugins}' must be List object") + + try: + with open(filepath, 'r') as f: + f.read() + except Exception: + raise ValueError(f"Error: Cannot read '{filepath}'") + # Initialize a SecretsCollection + secrets = SecretsCollection() + + # Load default settings if no filters and plugins provided: + if not filters and not plugins: + settings = default_settings() + # Scan the file + with settings: + secrets.scan_file(filepath) + return secrets.json() + elif filters and not plugins: + plugins = get_settings(plugins=plugins).get('plugins') + elif not filters and plugins: + filters = get_settings(filters=filters).get('filters') + + # Scan a file + settings = transient_settings( + {'plugins_used': plugins, 'filters_used': filters}, + ) + with settings: + secrets.scan_file(filepath) + return secrets.json() + + +def scan_git_repository( + repo_path: str, + plugins: list = None, + filters: list = None, + scan_all_files: bool = False, +) -> List[Dict]: + """ + Scan a local Git repository for secrets using the specified plugins and filters + + Args: + :param repo_path: Path to the local Git repository + :param filters: Custom filters for detect-secrets + :param plugins: Custom plugins for detect-secrets + :param scan_all_files (bool): Scan all files or only Git-tracked files. + :return: Detected secrets in List format + """ + if not isinstance(scan_all_files, bool): + raise ValueError(f"Error: '{scan_all_files}' must be 'bool' type") + if not isinstance(repo_path, str): + raise ValueError(f"Error: '{repo_path}' must be 'str' type path to repository") + + try: + repo = Repo(repo_path) + files_to_scan = [] + if scan_all_files: + for root, _, files in os.walk(repo_path): + if '.git' in root: + continue + for file in files: + files_to_scan.append(os.path.join(root, file)) + else: + files_to_scan = [ + os.path.join(repo_path, item.a_path) for item in repo.index.diff(None) + ] + files_to_scan.extend( + [os.path.join(repo_path, item) for item in repo.untracked_files], + ) + + results = [] + for filepath in files_to_scan: + secrets = scan_file(filepath, plugins=plugins, filters=filters) + if secrets != {}: + results.append(secrets) + return results + + except Exception: + raise ValueError(f"Error: '{repo_path}' is not a valid Git repository") diff --git a/detect_secrets/core/secrets_collection.py b/detect_secrets/core/secrets_collection.py index 094a274ed..d789afae0 100644 --- a/detect_secrets/core/secrets_collection.py +++ b/detect_secrets/core/secrets_collection.py @@ -76,6 +76,10 @@ def scan_file(self, filename: str) -> None: for secret in scan.scan_file(os.path.join(self.root, convert_local_os_path(filename))): self[convert_local_os_path(filename)].add(secret) + def scan_string(self, string: str) -> None: + for secret in scan.scan_line(string): + self['adhoc-string-scan'].add(secret) + def scan_diff(self, diff: str) -> None: """ :raises: UnidiffParseError diff --git a/requirements-dev.txt b/requirements-dev.txt index 34964ba6c..68c4b4e46 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -8,6 +8,7 @@ distlib==0.3.8 filelock==3.14.0 flake8==7.0.0 gibberish-detector==0.1.1 +GitPython==3.1.43 identify==2.5.36 idna==3.7 iniconfig==2.0.0 diff --git a/tests/api_test.py b/tests/api_test.py new file mode 100644 index 000000000..ef245aaf8 --- /dev/null +++ b/tests/api_test.py @@ -0,0 +1,273 @@ +import tempfile + +import pytest +from git import Repo + +from detect_secrets.api import get_settings +from detect_secrets.api import scan_file +from detect_secrets.api import scan_git_repository +from detect_secrets.api import scan_string + + +class TestScanString: + @staticmethod + def test_basic(): + assert scan_string('AKIATESTTESTTESTTEST') == { + 'adhoc-string-scan': [ + { + 'type': 'AWS Access Key', + 'filename': 'adhoc-string-scan', + 'hashed_secret': '874e6e498dcfe2ad53452e2b12ec336fca465408', + 'is_verified': False, + }, + { + 'type': 'Base64 High Entropy String', + 'filename': 'adhoc-string-scan', + 'hashed_secret': '874e6e498dcfe2ad53452e2b12ec336fca465408', + 'is_verified': False, + }, + ], + } + + @staticmethod + def test_with_plugins(): + plugins_used = [ + { + 'name': 'AWSKeyDetector', + }, + { + 'name': 'PrivateKeyDetector', + }, + ] + assert scan_string('AKIATESTTESTTESTTEST', plugins=plugins_used) == { + 'adhoc-string-scan': [ + { + 'type': 'AWS Access Key', + 'filename': 'adhoc-string-scan', + 'hashed_secret': '874e6e498dcfe2ad53452e2b12ec336fca465408', + 'is_verified': False, + }, + ], + } + + @staticmethod + def test_with_filters(): + filters_used = [{'path': 'detect-secrets.testing.plugins.hippodetector'}] + assert scan_string('No Secret', filters=filters_used) == { + 'adhoc-string-scan': [ + { + 'type': 'Hex High Entropy String', + 'filename': 'adhoc-string-scan', + 'hashed_secret': '58e6b3a414a1e090dfc6029add0f3555ccba127f', + 'is_verified': False, + }, + { + 'type': 'Hex High Entropy String', + 'filename': 'adhoc-string-scan', + 'hashed_secret': '7dd84750ee8571116cd2b06f62f56f472df8bf0a', + 'is_verified': False, + }, + { + 'type': 'Base64 High Entropy String', + 'filename': 'adhoc-string-scan', + 'hashed_secret': '816c52fd2bdd94a63cd0944823a6c0aa9384c103', + 'is_verified': False, + }, + { + 'type': 'Base64 High Entropy String', + 'filename': 'adhoc-string-scan', + 'hashed_secret': 'f4e7a8740db0b7a0bfd8e63077261475f61fc2a6', + 'is_verified': False, + }, + ], + } + + @staticmethod + def test_invalid_plugins(): + plugins = 'String' + with pytest.raises(ValueError, match=f"Error: '{plugins}' must be List object"): + assert scan_string('No Secret!', plugins=plugins) + + @staticmethod + def test_invalid_filters(): + filters = {'key': 'value'} + with pytest.raises( + ValueError, + match=f"Error: '{filters}' must be List object", + ): + assert scan_string('No Secret!', filters=filters) + + @staticmethod + def test_invalid_string(): + scan_to_string = 12345678 + with pytest.raises( + ValueError, + match=f"Error: '{scan_to_string}' must be 'string' object", + ): + assert scan_string(scan_to_string) + + +class TestScanFile: + @staticmethod + def test_basic(): + with tempfile.NamedTemporaryFile(delete=False) as temp_file: + temp_file.write(b"AWS_SECRET_KEY = 'AKIAIOSFODNN7EXAMPLE'\nNo secrets here") + temp_file_path = temp_file.name + assert scan_file(temp_file_path) == { + temp_file_path: [ + { + 'type': 'AWS Access Key', + 'filename': temp_file_path, + 'hashed_secret': '25910f981e85ca04baf359199dd0bd4a3ae738b6', + 'is_verified': False, + 'line_number': 1, + }, + { + 'type': 'Secret Keyword', + 'filename': temp_file_path, + 'hashed_secret': '25910f981e85ca04baf359199dd0bd4a3ae738b6', + 'is_verified': False, + 'line_number': 1, + }, + ], + } + + @staticmethod + def test_with_plugins(): + plugins_used = [ + { + 'name': 'AWSKeyDetector', + }, + { + 'name': 'PrivateKeyDetector', + }, + ] + with tempfile.NamedTemporaryFile(delete=False) as temp_file: + temp_file.write(b"AWS_SECRET_KEY = 'AKIAIOSFODNN7EXAMPLE'\nNo secrets here") + temp_file_path = temp_file.name + assert scan_file(temp_file_path, plugins=plugins_used) == { + temp_file_path: [ + { + 'type': 'AWS Access Key', + 'filename': temp_file_path, + 'hashed_secret': '25910f981e85ca04baf359199dd0bd4a3ae738b6', + 'is_verified': False, + 'line_number': 1, + }, + ], + } + + @staticmethod + def test_with_filters(): + filters_used = [{'path': 'detect-secrets.testing.plugins.hippodetector'}] + with tempfile.NamedTemporaryFile(delete=False) as temp_file: + temp_file.write(b"First Line'\nNo secrets here") + temp_file_path = temp_file.name + assert scan_file(temp_file_path, filters=filters_used) == {} + + @staticmethod + def test_invalid_plugins(): + plugins = 'String' + with pytest.raises(ValueError, match=f"Error: '{plugins}' must be List object"): + assert scan_file('temp_file.txt', plugins=plugins) + + @staticmethod + def test_invalid_filters(): + filters = {'key': 'value'} + with pytest.raises( + ValueError, + match=f"Error: '{filters}' must be List object", + ): + assert scan_file('temp_file.txt', filters=filters) + + @staticmethod + def test_not_existed_file(): + not_existed_file = 'not_existed_file.txt' + with pytest.raises( + ValueError, + match=f"Error: Cannot read '{not_existed_file}'", + ): + assert scan_file(not_existed_file) + + @staticmethod + def test_invalid_filepath(): + file_to_scan = 12345678 + with pytest.raises( + ValueError, + match=f"Error: '{file_to_scan}' must be 'string' formatted path to a file", + ): + assert scan_file(file_to_scan) + + +class TestScanGitRepo: + @staticmethod + def test_basic(): + repo_path = tempfile.mkdtemp() + Repo.init(repo_path) + with open(f'{repo_path}/test-file.txt', 'w') as temp_file: + temp_file.write('No Secret') + assert scan_git_repository(repo_path) == [] + + @staticmethod + def test_all_files(): + repo_path = tempfile.mkdtemp() + Repo.init(repo_path) + with open(f'{repo_path}/test-file.txt', 'w') as temp_file: + temp_file.write("AWS_SECRET_KEY = 'AKIAIOSFODNN7EXAMPLE'") + assert scan_git_repository(repo_path, scan_all_files=True) + + @staticmethod + def test_not_git(): + repo_path = tempfile.mkdtemp() + with pytest.raises(ValueError): + assert scan_git_repository(repo_path) + + @staticmethod + def test_invalid_all_files_boolean(): + repo_path = tempfile.mkdtemp() + with pytest.raises(ValueError, match="Error: 'true' must be 'bool' type"): + assert scan_git_repository(repo_path, scan_all_files='true') + + @staticmethod + def test_invalid_repo_path(): + repo_path = 12345678 + with pytest.raises( + ValueError, + match=f"Error: '{repo_path}' must be 'str' type path to repository", + ): + assert scan_git_repository(repo_path) + + +class TestGetSettings: + @staticmethod + def test_get_default_settings(): + assert get_settings() + + @staticmethod + def test_get_settings_with_plugins(): + plugins_used = [ + { + 'name': 'AWSKeyDetector', + }, + { + 'name': 'PrivateKeyDetector', + }, + ] + assert get_settings(plugins=plugins_used) + + @staticmethod + def test_get_settings_with_filters(): + filters_used = [{'path': 'detect-secrets.testing.plugins.hippodetector'}] + assert get_settings(filters=filters_used) + + @staticmethod + def test_invalid_plugins(): + plugins = 'String' + with pytest.raises(ValueError, match=f"Error: '{plugins}' must be List object"): + assert get_settings(plugins=plugins) + + @staticmethod + def test_invalid_filters(): + filters = 'String' + with pytest.raises(ValueError, match=f"Error: '{filters}' must be List object"): + assert get_settings(filters=filters)