wazuh · AlexRuiz7 · Aug 5, 2024 · Jul 8, 2024 · Jul 8, 2024 · Jul 8, 2024
diff --git a/integrations/.gitignore b/integrations/.gitignore
@@ -1,2 +1,3 @@
 external
-docker/certs
+docker/certs
+docker/config
diff --git a/integrations/README.md b/integrations/README.md
@@ -14,14 +14,13 @@ and combines security data from AWS and a broad range of enterprise security dat
 
 Refer to these documents for more information about this integration:
 
-* [User Guide](./amazon-security-lake/README.md).
-* [Developer Guide](./amazon-security-lake/CONTRIBUTING.md).
-
+- [User Guide](./amazon-security-lake/README.md).
+- [Developer Guide](./amazon-security-lake/CONTRIBUTING.md).
 
 ### Other integrations
 
 We host development environments to support the following integrations:
 
-* [Splunk](./splunk/README.md).
-* [Elasticsearch](./elastic/README.md).
-* [OpenSearch](./opensearch/README.md).
+- [Splunk](./splunk/README.md).
+- [Elasticsearch](./elastic/README.md).
+- [OpenSearch](./opensearch/README.md).
diff --git a/integrations/amazon-security-lake/CONTRIBUTING.md b/integrations/amazon-security-lake/CONTRIBUTING.md
@@ -16,21 +16,18 @@ This Docker Compose project will bring up these services:
 - our [events generator](../tools/events-generator/README.md)
 - an AWS Lambda Python container.
 
-On the one hand, the event generator will push events constantly to the indexer, to the `wazuh-alerts-4.x-sample` index by default (refer to the [events generator](../tools/events-generator/README.md) documentation for customization options). On the other hand, Logstash will query for new data and deliver it to output configured in the pipeline, which can be one of `indexer-to-s3` or `indexer-to-file`.
+On the one hand, the event generator will push events constantly to the indexer, to the `wazuh-alerts-4.x-sample` index by default (refer to the [events generator](../tools/events-generator/README.md) documentation for customization options). On the other hand, Logstash will query for new data and deliver it to output configured in the pipeline `indexer-to-s3`. This pipeline delivers the data to an S3 bucket, from which the data is processed using a Lambda function, to finally be sent to the Amazon Security Lake bucket in Parquet format.
 
-The `indexer-to-s3` pipeline is the method used by the integration. This pipeline delivers the data to an S3 bucket, from which the data is processed using a Lambda function, to finally be sent to the Amazon Security Lake bucket in Parquet format.
-
-
-Attach a terminal to the container and start the integration by starting Logstash, as follows:
+The pipeline starts automatically, but if you need to start it manually, attach a terminal to the Logstash container and start the integration using the command below:
 
 ```console
-/usr/share/logstash/bin/logstash -f /usr/share/logstash/pipeline/indexer-to-s3.conf --path.settings /etc/logstash
+/usr/share/logstash/bin/logstash -f /usr/share/logstash/pipeline/indexer-to-s3.conf
 ```
 
 After 5 minutes, the first batch of data will show up in http://localhost:9444/ui/wazuh-aws-security-lake-raw. You'll need to invoke the Lambda function manually, selecting the log file to process.
 
 ```bash
-bash amazon-security-lake/src/invoke-lambda.sh <file>
+bash amazon-security-lake/invoke-lambda.sh <file>
 ```
 
 Processed data will be uploaded to http://localhost:9444/ui/wazuh-aws-security-lake-parquet. Click on any file to download it, and check it's content using `parquet-tools`. Just make sure of installing the virtual environment first, through [requirements.txt](./requirements.txt).
@@ -56,4 +53,3 @@ See [README.md](README.md). The instructions on that section have been based on
 **Docker is required**.
 
 The [Makefile](./Makefile) in this folder automates the generation of a zip deployment package containing the source code and the required dependencies for the AWS Lambda function. Simply run `make` and it will generate the `wazuh_to_amazon_security_lake.zip` file. The main target runs a Docker container to install the Python3 dependencies locally, and zips the source code and the dependencies together.
-
diff --git a/integrations/amazon-security-lake/Dockerfile b/integrations/amazon-security-lake/Dockerfile
@@ -1,46 +1,17 @@
-# MULTI-STAGE build
+# docker build --platform linux/amd64 --no-cache -f aws-lambda.dockerfile -t docker-image:test .
+# docker run --platform linux/amd64 -p 9000:8080 docker-image:test
 
-FROM python:3.9 as builder
-# Create a virtualenv for dependencies. This isolates these packages from
-# system-level packages.
-RUN python3 -m venv /env
-# Setting these environment variables are the same as running
-# source /env/bin/activate.
-ENV VIRTUAL_ENV /env
-ENV PATH /env/bin:$PATH
-# Copy the application's requirements.txt and run pip to install all
-# dependencies into the virtualenv.
-COPY requirements.txt /app/requirements.txt
-RUN pip install -r /app/requirements.txt
+# FROM public.ecr.aws/lambda/python:3.9
+FROM amazon/aws-lambda-python:3.12
 
+# Copy requirements.txt
+COPY requirements.aws.txt ${LAMBDA_TASK_ROOT}
 
-FROM python:3.9
-ENV LOGSTASH_KEYSTORE_PASS="SecretPassword"
-# Add the application source code.
-COPY --chown=logstash:logstash ./src /home/app
-# Add execution persmissions.
-RUN chmod a+x /home/app/lambda_function.py
-# Copy the application's dependencies.
-COPY --from=builder /env /env
+# Install the specified packages
+RUN pip install -r requirements.aws.txt
 
-# Install Logstash
-RUN apt-get update && apt-get install -y iputils-ping wget gpg apt-transport-https
-RUN wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | gpg --dearmor -o /usr/share/keyrings/elastic-keyring.gpg && \
-    echo "deb [signed-by=/usr/share/keyrings/elastic-keyring.gpg] https://artifacts.elastic.co/packages/8.x/apt stable main" | tee -a /etc/apt/sources.list.d/elastic-8.x.list && \
-    apt-get update && apt install -y logstash
-# Install logstash-input-opensearch plugin.
-RUN /usr/share/logstash/bin/logstash-plugin install logstash-input-opensearch
-# Copy the Logstash's ingestion pipelines.
-COPY --chown=logstash:logstash logstash/pipeline /usr/share/logstash/pipeline
-# Grant logstash ownership over its files
-RUN chown --recursive logstash:logstash /usr/share/logstash /etc/logstash /var/log/logstash /var/lib/logstash
+# Copy function code
+COPY src ${LAMBDA_TASK_ROOT}
 
-USER logstash
-# Copy and run the setup.sh script to create and configure a keystore for Logstash.
-COPY --chown=logstash:logstash logstash/setup.sh /usr/share/logstash/bin/setup.sh
-RUN bash /usr/share/logstash/bin/setup.sh
-
-# Disable ECS compatibility
-RUN `echo "pipeline.ecs_compatibility: disabled" >> /etc/logstash/logstash.yml`
-
-WORKDIR /home/app
+# Set the CMD to your handler (could also be done as a parameter override outside of the Dockerfile)
+CMD [ "lambda_function.lambda_handler" ]
diff --git a/integrations/amazon-security-lake/README.md b/integrations/amazon-security-lake/README.md
@@ -90,7 +90,7 @@ Follow the [official documentation](https://docs.aws.amazon.com/lambda/latest/dg
 - Configure the runtime to have 512 MB of memory and 30 seconds timeout.
 - Configure a trigger so every object with `.txt` extension uploaded to the S3 bucket created previously invokes the Lambda.
   ![AWS Lambda trigger](./images/asl-lambda-trigger.jpeg)
--  Use the [Makefile](./Makefile) to generate the zip package `wazuh_to_amazon_security_lake.zip`, and upload it to the S3 bucket created previously as per [these instructions](https://docs.aws.amazon.com/lambda/latest/dg/gettingstarted-package.html#gettingstarted-package-zip). See [CONTRIBUTING](./CONTRIBUTING.md) for details about the Makefile.
+- Use the [Makefile](./Makefile) to generate the zip package `wazuh_to_amazon_security_lake.zip`, and upload it to the S3 bucket created previously as per [these instructions](https://docs.aws.amazon.com/lambda/latest/dg/gettingstarted-package.html#gettingstarted-package-zip). See [CONTRIBUTING](./CONTRIBUTING.md) for details about the Makefile.
 - Configure the Lambda with the at least the required _Environment Variables_ below:
 
   | Environment variable | Required | Value                                                                                              |

diff --git a/integrations/amazon-security-lake/aws-lambda.dockerfile b/integrations/amazon-security-lake/aws-lambda.dockerfile
diff --git a/integrations/amazon-security-lake/invoke-lambda.sh b/integrations/amazon-security-lake/invoke-lambda.sh
@@ -39,4 +39,4 @@ curl -X POST "http://localhost:9000/2015-03-31/functions/function/invocations" -
       }
     }
   ]
-}'
+}'
diff --git a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf
diff --git a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-s3.conf b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-s3.conf
@@ -27,19 +27,27 @@ output {
    s3 {
       id => "output.s3"
       access_key_id => "${AWS_ACCESS_KEY_ID}"
-      secret_access_key => "${AWS_SECRET_ACCESS_KEY}"
-      region => "${AWS_REGION}"
-      endpoint => "${AWS_ENDPOINT}"
       bucket => "${S3_BUCKET_RAW}"
       codec => "json_lines"
-      retry_count => 0
-      validate_credentials_on_root_bucket => false
+      encoding => "gzip"
+      endpoint => "${AWS_ENDPOINT}"
       prefix => "%{+YYYY}%{+MM}%{+dd}"
+      region => "${AWS_REGION}"
+      retry_count => 0
+      secret_access_key => "${AWS_SECRET_ACCESS_KEY}"
       server_side_encryption => true
       server_side_encryption_algorithm => "AES256"
+      time_file => 5
+      validate_credentials_on_root_bucket => false
       additional_settings => {
          "force_path_style" => true
       }
-      time_file => 5
+   }
+   file {
+      id => "output.file"
+      path => "/usr/share/logstash/logs/indexer-to-file-%{+YYYY-MM-dd-HH}.log"
+      file_mode => 0644
+      codec => json_lines
+      flush_interval => 30
    }
 }
diff --git a/integrations/amazon-security-lake/logstash/setup.sh b/integrations/amazon-security-lake/logstash/setup.sh
diff --git a/integrations/amazon-security-lake/src/lambda_function.py b/integrations/amazon-security-lake/src/lambda_function.py
@@ -2,6 +2,7 @@
 import os
 import urllib.parse
 import json
+import gzip
 import boto3
 import pyarrow as pa
 import pyarrow.parquet as pq
@@ -31,7 +32,7 @@ def get_events(bucket: str, key: str) -> list:
     logger.info(f"Reading {key}.")
     try:
         response = s3_client.get_object(Bucket=bucket, Key=key)
-        data = response['Body'].read().decode('utf-8')
+        data = gzip.decompress(response['Body'].read()).decode('utf-8')
         return data.splitlines()
     except ClientError as e:
         logger.error(

diff --git a/integrations/docker/.env b/integrations/docker/.env
@@ -4,9 +4,6 @@ ELASTIC_PASSWORD=elastic
 # Password for the 'kibana_system' user (at least 6 characters)
 KIBANA_PASSWORD=elastic
 
-# Version of Elastic products
-STACK_VERSION=8.6.2
-
 # Set the cluster name
 CLUSTER_NAME=elastic
 
@@ -22,8 +19,26 @@ KIBANA_PORT=5602
 # Increase or decrease based on the available host memory (in bytes)
 MEM_LIMIT=1073741824
 
+# Wazuh version
+WAZUH_VERSION=4.8.1
+
+# Wazuh Indexer version (Provisionally using OpenSearch)
+WAZUH_INDEXER_VERSION=2.14.0
+
+# Wazuh Dashboard version (Provisionally using OpenSearch Dashboards)
+WAZUH_DASHBOARD_VERSION=2.14.0
+
+# Wazuh certs generator version
+WAZUH_CERTS_GENERATOR_VERSION=0.0.1
+
 # OpenSearch destination cluster version
 OS_VERSION=2.14.0
 
-# Wazuh version
-WAZUH_VERSION=4.7.5
+# Logstash version:
+LOGSTASH_OSS_VERSION=8.9.0
+
+# Splunk version:
+SPLUNK_VERSION=9.1.4
+
+# Version of Elastic products
+STACK_VERSION=8.14.3