From a08f05e5d1ca3427b52c6d59a06c2dda83f5b8f8 Mon Sep 17 00:00:00 2001 From: elayrocks Date: Wed, 26 Jun 2024 08:16:00 -0700 Subject: [PATCH] Azure Function App which runs periodically to aggregate the bytes read per IP address over a window of time (#215) * init * update * update flake8 config * code format changes * fix long lines * fix imports * function app changes * code format changes * test * remove readme * remove redundant packages and revert python version * revert python version * remove redundant type hints * use settings class inherited from baseSettings * change kql * use placeholder for timer schedule * change test parameter * update assertions and logger * remove import * remove dash in table name * update test id * format * add no-integration flag * add dependencies * test * change trigger to run every hour * use azure clients as context manager * add context managers in test * role assignment for function app * change LAW name * change role * change provider's name * change name of LAW * better readability * better readability * format * add logging * UPDATE function settings * suppress mypy warning * update env variables for function app * typo --------- Co-authored-by: elay --- .flake8 | 3 +- deployment/terraform/resources/functions.tf | 31 +++++ deployment/terraform/resources/providers.tf | 6 + deployment/terraform/resources/variables.tf | 21 +++ deployment/terraform/staging/main.tf | 4 + docker-compose.yml | 1 + pc-funcs.dev.env | 4 + pcfuncs/Dockerfile | 1 + pcfuncs/ipban/__init__.py | 29 +++++ pcfuncs/ipban/config.py | 18 +++ pcfuncs/ipban/function.json | 11 ++ pcfuncs/ipban/models.py | 70 ++++++++++ pcfuncs/requirements-deploy.txt | 3 +- pcfuncs/requirements.txt | 3 +- pcfuncs/tests/conftest.py | 29 +++++ pcfuncs/tests/ipban/__init__.py | 0 pcfuncs/tests/ipban/test_ipban.py | 136 ++++++++++++++++++++ scripts/bin/test-funcs | 19 ++- scripts/test | 2 +- 19 files changed, 386 insertions(+), 5 deletions(-) create mode 100644 pcfuncs/ipban/__init__.py create mode 100644 pcfuncs/ipban/config.py create mode 100644 pcfuncs/ipban/function.json create mode 100644 pcfuncs/ipban/models.py create mode 100644 pcfuncs/tests/conftest.py create mode 100644 pcfuncs/tests/ipban/__init__.py create mode 100644 pcfuncs/tests/ipban/test_ipban.py diff --git a/.flake8 b/.flake8 index 419d1104..e034c121 100644 --- a/.flake8 +++ b/.flake8 @@ -4,4 +4,5 @@ extend-ignore = E203, W503 exclude = .git __pycache__ - setup.py \ No newline at end of file + setup.py + .venv \ No newline at end of file diff --git a/deployment/terraform/resources/functions.tf b/deployment/terraform/resources/functions.tf index e77b3082..764a3e45 100644 --- a/deployment/terraform/resources/functions.tf +++ b/deployment/terraform/resources/functions.tf @@ -41,6 +41,11 @@ resource "azurerm_function_app" "pcfuncs" { "IMAGE_OUTPUT_STORAGE_URL" = var.image_output_storage_url, "IMAGE_API_ROOT_URL" = var.funcs_data_api_url, "IMAGE_TILE_REQUEST_CONCURRENCY" = tostring(var.funcs_tile_request_concurrency), + + # IPBan function + "STORAGE_ACCOUNT_URL" = var.func_storage_account_url, + "BANNED_IP_TABLE" = var.banned_ip_table, + "LOG_ANALYTICS_WORKSPACE_ID" = var.prod_log_analytics_workspace_id, } os_type = "linux" @@ -77,3 +82,29 @@ resource "azurerm_role_assignment" "function-app-animation-container-access" { azurerm_function_app.pcfuncs ] } + +resource "azurerm_role_assignment" "function-app-storage-table-data-contributor" { + scope = azurerm_storage_account.pc.id + role_definition_name = "Storage Table Data Contributor" + principal_id = azurerm_function_app.pcfuncs.identity[0].principal_id + + depends_on = [ + azurerm_function_app.pcfuncs + ] +} + +data "azurerm_log_analytics_workspace" "prod_log_analytics_workspace" { + provider = azurerm.planetary_computer_subscription + name = var.prod_log_analytics_workspace_name + resource_group_name = var.pc_resources_rg +} + +resource "azurerm_role_assignment" "function-app-log-analytics-access" { + scope = data.azurerm_log_analytics_workspace.prod_log_analytics_workspace.id + role_definition_name = "Log Analytics Reader" + principal_id = azurerm_function_app.pcfuncs.identity[0].principal_id + + depends_on = [ + azurerm_function_app.pcfuncs + ] +} \ No newline at end of file diff --git a/deployment/terraform/resources/providers.tf b/deployment/terraform/resources/providers.tf index 2404234d..d71323b0 100644 --- a/deployment/terraform/resources/providers.tf +++ b/deployment/terraform/resources/providers.tf @@ -8,6 +8,12 @@ provider "azurerm" { # storage_use_azuread = true } +provider "azurerm" { + alias = "planetary_computer_subscription" + subscription_id = "9da7523a-cb61-4c3e-b1d4-afa5fc6d2da9" + features {} +} + terraform { required_version = ">= 0.13" diff --git a/deployment/terraform/resources/variables.tf b/deployment/terraform/resources/variables.tf index c760a708..e83b5f57 100644 --- a/deployment/terraform/resources/variables.tf +++ b/deployment/terraform/resources/variables.tf @@ -11,6 +11,11 @@ variable "pc_test_resources_rg" { default = "pc-test-manual-resources" } +variable "pc_resources_rg" { + type = string + default = "pc-manual-resources" +} + variable "pc_test_resources_kv" { type = string default = "pc-test-deploy-secrets" @@ -123,6 +128,22 @@ variable "image_output_storage_url" { type = string } +variable "prod_log_analytics_workspace_name" { + type = string +} + +variable "prod_log_analytics_workspace_id" { + type = string +} + +variable "banned_ip_table" { + type = string +} + +variable "func_storage_account_url" { + type = string +} + # ----------------- # Local variables diff --git a/deployment/terraform/staging/main.tf b/deployment/terraform/staging/main.tf index 75567650..b9f3ce92 100644 --- a/deployment/terraform/staging/main.tf +++ b/deployment/terraform/staging/main.tf @@ -22,6 +22,10 @@ module "resources" { animation_output_storage_url = "https://pcfilestest.blob.core.windows.net/output/animations" image_output_storage_url = "https://pcfilestest.blob.core.windows.net/output/images" + prod_log_analytics_workspace_name = "pc-api-loganalytics" + prod_log_analytics_workspace_id = "78d48390-b6bb-49a9-b7fd-a86f6522e9c4" + func_storage_account_url = "https://pctapisstagingsa.table.core.windows.net/" + banned_ip_table = "blobstoragebannedip" } terraform { diff --git a/docker-compose.yml b/docker-compose.yml index 98f3b1c4..71e4d413 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -53,6 +53,7 @@ services: - ./pccommon:/home/site/pccommon - ./pcfuncs:/home/site/wwwroot - .:/opt/src + - ~/.azure:/home/.azure nginx: image: pc-apis-nginx diff --git a/pc-funcs.dev.env b/pc-funcs.dev.env index 87bf075c..63b5f6ee 100644 --- a/pc-funcs.dev.env +++ b/pc-funcs.dev.env @@ -10,3 +10,7 @@ IMAGE_OUTPUT_STORAGE_URL="http://azurite:10000/devstoreaccount1/output/images" IMAGE_OUTPUT_ACCOUNT_KEY="Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" IMAGE_API_ROOT_URL="https://planetarycomputer-staging.microsoft.com/api/data/v1" IMAGE_TILE_REQUEST_CONCURRENCY=2 + +STORAGE_ACCOUNT_URL=https://pctapisstagingsa.table.core.windows.net/ +BANNED_IP_TABLE=blobstoragebannedip +LOG_ANALYTICS_WORKSPACE_ID=78d48390-b6bb-49a9-b7fd-a86f6522e9c4 \ No newline at end of file diff --git a/pcfuncs/Dockerfile b/pcfuncs/Dockerfile index 3beb133f..323859d3 100644 --- a/pcfuncs/Dockerfile +++ b/pcfuncs/Dockerfile @@ -2,6 +2,7 @@ FROM mcr.microsoft.com/azure-functions/python:4-python3.8 # git required for pip installs from git RUN apt update && apt install -y git +RUN curl -sL https://aka.ms/InstallAzureCLIDeb | bash ENV AzureWebJobsScriptRoot=/home/site/wwwroot \ AzureFunctionsJobHost__Logging__Console__IsEnabled=true diff --git a/pcfuncs/ipban/__init__.py b/pcfuncs/ipban/__init__.py new file mode 100644 index 00000000..72f0e645 --- /dev/null +++ b/pcfuncs/ipban/__init__.py @@ -0,0 +1,29 @@ +import datetime +import logging + +import azure.functions as func +from azure.data.tables import TableServiceClient +from azure.identity import DefaultAzureCredential +from azure.monitor.query import LogsQueryClient + +from .config import settings +from .models import UpdateBannedIPTask + +logger = logging.getLogger(__name__) + + +def main(mytimer: func.TimerRequest) -> None: + utc_timestamp: str = ( + datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat() + ) + logger.info("Updating the ip ban list at %s", utc_timestamp) + credential: DefaultAzureCredential = DefaultAzureCredential() + with LogsQueryClient(credential) as logs_query_client: + with TableServiceClient( + endpoint=settings.storage_account_url, credential=credential + ) as table_service_client: + with table_service_client.create_table_if_not_exists( + settings.banned_ip_table + ) as table_client: + task = UpdateBannedIPTask(logs_query_client, table_client) + task.run() diff --git a/pcfuncs/ipban/config.py b/pcfuncs/ipban/config.py new file mode 100644 index 00000000..4261461d --- /dev/null +++ b/pcfuncs/ipban/config.py @@ -0,0 +1,18 @@ +# config.py +from pydantic import BaseSettings, Field + + +class Settings(BaseSettings): + storage_account_url: str = Field(env="STORAGE_ACCOUNT_URL") + banned_ip_table: str = Field(env="BANNED_IP_TABLE") + log_analytics_workspace_id: str = Field(env="LOG_ANALYTICS_WORKSPACE_ID") + + # Time and threshold settings + time_window_in_hours: int = Field(default=24, env="TIME_WINDOW_IN_HOURS") + threshold_read_count_in_gb: int = Field( + default=5120, env="THRESHOLD_READ_COUNT_IN_GB" + ) + + +# Create a global settings instance +settings = Settings() # type: ignore diff --git a/pcfuncs/ipban/function.json b/pcfuncs/ipban/function.json new file mode 100644 index 00000000..2b55fa8e --- /dev/null +++ b/pcfuncs/ipban/function.json @@ -0,0 +1,11 @@ +{ + "scriptFile": "__init__.py", + "bindings": [ + { + "name": "mytimer", + "type": "timerTrigger", + "direction": "in", + "schedule": "0 */1 * * *" + } + ] +} diff --git a/pcfuncs/ipban/models.py b/pcfuncs/ipban/models.py new file mode 100644 index 00000000..71ec5021 --- /dev/null +++ b/pcfuncs/ipban/models.py @@ -0,0 +1,70 @@ +import logging +from typing import Any, List, Set + +from azure.data.tables import TableClient, UpdateMode +from azure.monitor.query import LogsQueryClient +from azure.monitor.query._models import LogsTableRow + +from .config import settings + + +class UpdateBannedIPTask: + def __init__( + self, + logs_query_client: LogsQueryClient, + table_client: TableClient, + ) -> None: + self.log_query_client = logs_query_client + self.table_client = table_client + + def run(self) -> List[LogsTableRow]: + query_result: List[LogsTableRow] = self.get_blob_logs_query_result() + logging.info(f"Kusto query result: {query_result}") + self.update_banned_ips(query_result) + return query_result + + def get_blob_logs_query_result(self) -> List[LogsTableRow]: + query: str = f""" + StorageBlobLogs + | where TimeGenerated > ago({settings.time_window_in_hours}h) + | extend IpAddress = tostring(split(CallerIpAddress, ":")[0]) + | where OperationName == 'GetBlob' + | where not(ipv4_is_private(IpAddress)) + | summarize readcount = sum(ResponseBodySize) / (1024 * 1024 * 1024) + by IpAddress + | where readcount > {settings.threshold_read_count_in_gb} + """ + response: Any = self.log_query_client.query_workspace( + settings.log_analytics_workspace_id, query, timespan=None + ) + return response.tables[0].rows + + def update_banned_ips(self, query_result: List[LogsTableRow]) -> None: + existing_ips = { + entity["RowKey"] for entity in self.table_client.list_entities() + } + result_ips: Set[str] = set() + for result in query_result: + ip_address: str = result[0] + read_count: int = int(result[1]) + result_ips.add(ip_address) + entity = { + "PartitionKey": ip_address, + "RowKey": ip_address, + "ReadCount": read_count, + "Threshold": settings.threshold_read_count_in_gb, + "TimeWindow": settings.time_window_in_hours, + } + + if ip_address in existing_ips: + self.table_client.update_entity(entity, mode=UpdateMode.REPLACE) + else: + self.table_client.create_entity(entity) + + for ip_address in existing_ips: + if ip_address not in result_ips: + self.table_client.delete_entity( + partition_key=ip_address, row_key=ip_address + ) + + logging.info("IP ban list has been updated successfully") diff --git a/pcfuncs/requirements-deploy.txt b/pcfuncs/requirements-deploy.txt index 3c95a933..38dfa8a3 100644 --- a/pcfuncs/requirements-deploy.txt +++ b/pcfuncs/requirements-deploy.txt @@ -14,7 +14,8 @@ pillow==10.3.0 pyproj==3.3.1 pydantic>=1.9,<2.0.0 rasterio==1.3.* - +azure-monitor-query==1.3.0 +pytest-mock==3.14.0 # The deploy process needs symlinks to bring in # pctasks libraries. Symlink is created in deploy script ./pccommon_linked diff --git a/pcfuncs/requirements.txt b/pcfuncs/requirements.txt index b1cf10b3..fdaab225 100644 --- a/pcfuncs/requirements.txt +++ b/pcfuncs/requirements.txt @@ -14,7 +14,8 @@ pillow==10.3.0 pyproj==3.3.1 pydantic>=1.9,<2.0.0 rasterio==1.3.* - +azure-monitor-query==1.3.0 +pytest-mock==3.14.0 # Deployment needs to copy the local code into # the app code directory, so requires a separate # requirements file. diff --git a/pcfuncs/tests/conftest.py b/pcfuncs/tests/conftest.py new file mode 100644 index 00000000..398fc9b2 --- /dev/null +++ b/pcfuncs/tests/conftest.py @@ -0,0 +1,29 @@ +from typing import List + +import pytest + + +def pytest_addoption(parser: pytest.Parser) -> None: + parser.addoption( + "--no-integration", + action="store_true", + default=False, + help="don't run integration tests", + ) + + +def pytest_configure(config: pytest.Config) -> None: + config.addinivalue_line("markers", "integration: mark as an integration test") + + +def pytest_collection_modifyitems( + config: pytest.Config, items: List[pytest.Item] +) -> None: + if config.getoption("--no-integration"): + # --no-integration given in cli: skip integration tests + skip_integration = pytest.mark.skip( + reason="needs --no-integration option to run" + ) + for item in items: + if "integration" in item.keywords: + item.add_marker(skip_integration) diff --git a/pcfuncs/tests/ipban/__init__.py b/pcfuncs/tests/ipban/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/pcfuncs/tests/ipban/test_ipban.py b/pcfuncs/tests/ipban/test_ipban.py new file mode 100644 index 00000000..749c4fd7 --- /dev/null +++ b/pcfuncs/tests/ipban/test_ipban.py @@ -0,0 +1,136 @@ +import logging +import uuid +from typing import Any, Generator, List, Tuple +from unittest.mock import MagicMock + +import pytest +from azure.data.tables import TableClient, TableServiceClient +from azure.data.tables._entity import TableEntity +from azure.identity import DefaultAzureCredential +from azure.monitor.query import LogsQueryClient +from azure.monitor.query._models import LogsTableRow +from ipban.config import settings +from ipban.models import UpdateBannedIPTask +from pytest_mock import MockerFixture + +MOCK_LOGS_QUERY_RESULT = [("192.168.1.1", 8000), ("192.168.1.4", 12000)] +TEST_ID = str(uuid.uuid4()).replace("-", "") # dash is not allowed in table name +TEST_BANNED_IP_TABLE = f"testblobstoragebannedip{TEST_ID}" + +logger = logging.getLogger(__name__) +PREPOPULATED_ENTITIES = [ + { + "PartitionKey": "192.168.1.1", + "RowKey": "192.168.1.1", + "ReadCount": 647, + "Threshold": settings.threshold_read_count_in_gb, + "TimeWindow": settings.time_window_in_hours, + }, + { + "PartitionKey": "192.168.1.2", + "RowKey": "192.168.1.2", + "ReadCount": 214, + "Threshold": settings.threshold_read_count_in_gb, + "TimeWindow": settings.time_window_in_hours, + }, + { + "PartitionKey": "192.168.1.3", + "RowKey": "192.168.1.3", + "ReadCount": 550, + "Threshold": settings.threshold_read_count_in_gb, + "TimeWindow": settings.time_window_in_hours, + }, +] + + +def populate_banned_ip_table(table_client: TableClient) -> None: + for entity in PREPOPULATED_ENTITIES: + table_client.create_entity(entity) + + +@pytest.fixture +def mock_clients( + mocker: MockerFixture, +) -> Generator[Tuple[MagicMock, TableClient], Any, None]: + mock_response: MagicMock = mocker.MagicMock() + mock_response.tables[0].rows = MOCK_LOGS_QUERY_RESULT + logs_query_client: MagicMock = mocker.MagicMock() + logs_query_client.query_workspace.return_value = mock_response + CONNECTION_STRING: str = ( + "DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;" + "AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsu" + "Fq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;" + "TableEndpoint=http://azurite:10002/devstoreaccount1;" + ) + + # Use Azurite for unit tests and populate the table with initial data + with TableServiceClient.from_connection_string( + CONNECTION_STRING + ) as table_service_client: + with table_service_client.create_table_if_not_exists( + table_name=TEST_BANNED_IP_TABLE + ) as table_client: + + # Pre-populate the banned IP table + populate_banned_ip_table(table_client) + + # Yield the clients for use + yield logs_query_client, table_client + + # Delete the test table + table_service_client.delete_table(TEST_BANNED_IP_TABLE) + + +@pytest.fixture +def integration_clients( + mocker: MockerFixture, +) -> Generator[Tuple[LogsQueryClient, TableClient], Any, None]: + credential: DefaultAzureCredential = DefaultAzureCredential() + with LogsQueryClient(credential) as logs_query_client: + with TableServiceClient( + endpoint=settings.storage_account_url, credential=credential + ) as table_service_client: + with table_service_client.create_table_if_not_exists( + TEST_BANNED_IP_TABLE + ) as table_client: + + # Pre-populate the banned IP table + populate_banned_ip_table(table_client) + + # Yield the clients for use + yield logs_query_client, table_client + + # Delete the test table + table_service_client.delete_table(TEST_BANNED_IP_TABLE) + + +@pytest.mark.integration +def test_update_banned_ip_integration( + integration_clients: Tuple[LogsQueryClient, TableClient] +) -> None: + logger.info(f"Test id: {TEST_ID} - integration test is running") + logs_query_client, table_client = integration_clients + assert len(list(table_client.list_entities())) == len(PREPOPULATED_ENTITIES) + task: UpdateBannedIPTask = UpdateBannedIPTask(logs_query_client, table_client) + # retrieve the logs query result from pc-api-loganalytics + logs_query_result: List[LogsTableRow] = task.run() + assert len(list(table_client.list_entities())) == len(logs_query_result) + for ip, expected_read_count in logs_query_result: + entity: TableEntity = table_client.get_entity(ip, ip) + assert entity["ReadCount"] == expected_read_count + assert entity["Threshold"] == settings.threshold_read_count_in_gb + assert entity["TimeWindow"] == settings.time_window_in_hours + + +def test_update_banned_ip(mock_clients: Tuple[MagicMock, TableClient]) -> None: + logger.info(f"Test id: {TEST_ID} - unit test is running") + mock_logs_query_client, table_client = mock_clients + assert len(list(table_client.list_entities())) == len(PREPOPULATED_ENTITIES) + task: UpdateBannedIPTask = UpdateBannedIPTask(mock_logs_query_client, table_client) + task.run() + assert len(list(table_client.list_entities())) == len(MOCK_LOGS_QUERY_RESULT) + for ip, expected_read_count in MOCK_LOGS_QUERY_RESULT: + entity = table_client.get_entity(ip, ip) + assert entity["ReadCount"] == expected_read_count + assert entity["Threshold"] == settings.threshold_read_count_in_gb + assert entity["TimeWindow"] == settings.time_window_in_hours diff --git a/scripts/bin/test-funcs b/scripts/bin/test-funcs index 22b1d2db..64bb06eb 100755 --- a/scripts/bin/test-funcs +++ b/scripts/bin/test-funcs @@ -15,6 +15,23 @@ This scripts is meant to be run inside the funcs container. " } +while [[ $# -gt 0 ]]; do case $1 in + --no-integration) + INTEGRATION="--no-integration" + shift + ;; + --help) + usage + exit 0 + shift + ;; + *) + usage "Unknown parameter passed: $1" + shift + shift + ;; + esac done + if [ "${BASH_SOURCE[0]}" = "${0}" ]; then echo "Running mypy for funcs..." @@ -27,6 +44,6 @@ if [ "${BASH_SOURCE[0]}" = "${0}" ]; then flake8 pcfuncs echo "Running unit tests for funcs..." - python -m pytest pcfuncs/tests + python -m pytest pcfuncs/tests ${INTEGRATION:+$INTEGRATION} fi diff --git a/scripts/test b/scripts/test index 35685dff..8f283525 100755 --- a/scripts/test +++ b/scripts/test @@ -94,7 +94,7 @@ if [ "${BASH_SOURCE[0]}" = "${0}" ]; then docker-compose \ -f docker-compose.yml \ run --rm \ - funcs /bin/bash -c "cd /opt/src && scripts/bin/test-funcs" + funcs /bin/bash -c "cd /opt/src && scripts/bin/test-funcs ${NO_INTEGRATION}" fi fi