Skip to content
This repository has been archived by the owner on Nov 15, 2024. It is now read-only.

Commit

Permalink
Added MD5 sum verification option
Browse files Browse the repository at this point in the history
  • Loading branch information
kra-ts committed Jan 27, 2022
1 parent d4805a6 commit 6cd800e
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 14 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ optional arguments:
--aws-region AWS_REGION
The AWS region (should match SQS queue region)
--debug We do the debug?
--checksum If set, the MD5 sum is verified
--tmpdir TMPDIR The temp directory where the work will be done
```

Expand Down
53 changes: 39 additions & 14 deletions fdr2humio.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import sys
import tempfile
import urllib.parse
from hashlib import md5


import boto3
import botocore
Expand Down Expand Up @@ -149,6 +151,13 @@ def setup_args():
# Are we going to do the debug?
parser.add_argument("--debug", action="store_true", help="We do the debug?")

# MD5 validation of downloaded files (will still verify file size)
parser.add_argument(
"--checksum",
action="store_true",
help="If set, the MD5 sum is verified",
)

# Where can we do our workings
parser.add_argument(
"--tmpdir",
Expand Down Expand Up @@ -221,25 +230,41 @@ def post_files_to_humio(args, payload, s3, http):
# Download the source file from S3
s3.download_file(args["bucket"], asset["path"], local_file_path)

# TODO: Check the checksum

# TODO: check the size!
processed["files"] += 1
processed["bytes"] += os.path.getsize(local_file_path)
# TODO: check if space available on disk!

# POST to Humio HEC Raw w/ compression
with open(local_file_path, "rb") as f:
r = http.request(
"POST",
humio_url(args),
body=f.read(),
headers=humio_headers(args),
)

# TODO: Better error handling needed here as we may partially process a message
if r.status != 200:
data = f.read()

# TODO: Better error handling when MD5 have mismatch
if args["checksum"]:
local_file_md5 = md5(data).hexdigest()
if local_file_md5 != asset["checksum"]:
logging.debug(
f"MD5 checksum ({local_file_md5}) of file \"{asset['path']}\" "
f'matches with file on-disk "{local_file_path}".'
)
else:
logging.error(
f"MD5 mismatch {asset['checksum']} ({asset['path']}) doesn't match local file "
f"MD5 {local_file_md5} ({local_file_path})."
)
return False

r = http.request(
"POST",
humio_url(args),
body=data,
headers=humio_headers(args),
)

processed["files"] += 1
processed["bytes"] += os.path.getsize(local_file_path)

# TODO: Better error handling needed here as we may partially process a message
if r.status != 200:
return False

# Everything sent as expected
return processed

Expand Down

0 comments on commit 6cd800e

Please sign in to comment.