Skip to content

Commit

Permalink
Check lambda object size
Browse files Browse the repository at this point in the history
  • Loading branch information
ericzbeard committed Jul 25, 2024
1 parent c94435f commit 5da2df0
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 1 deletion.
76 changes: 76 additions & 0 deletions internal/aws/s3/s3.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package s3

import (
"archive/zip"
"bytes"
"context"
"crypto/sha256"
Expand Down Expand Up @@ -246,6 +247,81 @@ func GetObject(bucketName string, key string) ([]byte, error) {
return body, nil
}

// GetUnzippedObjectSize gets the uncompressed length in bytes of an object.
// Calling this on a large object will be slow!
func GetUnzippedObjectSize(bucketName string, key string) (int64, error) {
result, err := getClient().GetObject(context.Background(),
&s3.GetObjectInput{
Bucket: &bucketName,
Key: &key,
})
if err != nil {
return 0, err
}
var size int64 = 0

body, err := io.ReadAll(result.Body)
if err != nil {
return 0, err
}

// Unzip the archive and count the total bytes of all files
zipReader, err := zip.NewReader(bytes.NewReader(body), int64(len(body)))
if err != nil {
// TODO: What if it's not a zip file? Maybe return something like -1?
return 0, err
}

// Read all the files from zip archive and count total size
for _, zipFile := range zipReader.File {
config.Debugf("Reading file from zip archive: %s", zipFile.Name)

f, err := zipFile.Open()
if err != nil {
config.Debugf("Error opening zip file %s: %v", zipFile.Name, err)
return 0, err
}
defer f.Close()

bytesRead := 0
buf := make([]byte, 256)
for {
bytesRead, err = f.Read(buf)
if err != nil {
config.Debugf("Error reading from zip file %s: %v", zipFile.Name, err)
}
if bytesRead == 0 {
break
}
size += int64(bytesRead)
}
}

config.Debugf("Total size for %s/%s is %d", bucketName, key, size)

return size, nil
}

type S3ObjectInfo struct {
SizeBytes int64
}

// HeadObject gets information about an object without downloading it
func HeadObject(bucketName string, key string) (*S3ObjectInfo, error) {
result, err := getClient().HeadObject(context.Background(),
&s3.HeadObjectInput{
Bucket: &bucketName,
Key: &key,
})
if err != nil {
return nil, err
}
retval := &S3ObjectInfo{
SizeBytes: *result.ContentLength,
}
return retval, nil
}

// PutObject puts an object into a bucket
func PutObject(bucketName string, key string, body []byte) error {
_, err := getClient().PutObject(context.Background(),
Expand Down
1 change: 1 addition & 0 deletions internal/cmd/forecast/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ These can be ignored with the `--ignore` argument.
| F0018 | SageMaker Notebook quota limit has not been reached |
| F0019 | Lambda S3Bucket exists |
| F0020 | Lambda S3Key exists |
| F0021 | Lambda zip file has a valid size |

## Estimates

Expand Down
36 changes: 35 additions & 1 deletion internal/cmd/forecast/lambda.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,11 +64,45 @@ func checkLambdaS3Bucket(input *PredictionInput, forecast *Forecast) {
forecast.Add(F0019, true, "S3 bucket exists")

// If the bucket exists, check to see if the object exists
obj, err := s3.GetObject(s3Bucket.Value, s3Key.Value)
obj, err := s3.HeadObject(s3Bucket.Value, s3Key.Value)

if err != nil || obj == nil {
forecast.Add(F0020, false, "S3 object does not exist")
} else {
forecast.Add(F0020, true, "S3 object exists")

config.Debugf("S3 Object %s/%s SizeBytes: %v",
s3Bucket.Value, s3Key.Value, obj.SizeBytes)

// Make sure it's less than 50Mb and greater than 0
// We are not downloading it and unzipping to check total size,
// since that would take too long for large files.
var max int64 = 50 * 1024 * 1024
if obj.SizeBytes > 0 && obj.SizeBytes <= max {

if obj.SizeBytes < 256 {
// This is suspiciously small. Download it and decompress
// to see if it's a zero byte file. A simple "Hello" python
// handler will zip down to 207b but an empty file has a
// similar zip size, so we can't know from the zip size itself.
unzippedSize, err := s3.GetUnzippedObjectSize(s3Bucket.Value, s3Key.Value)
if err != nil {
config.Debugf("Unable to unzip object: %v", err)
} else if unzippedSize == 0 {
forecast.Add(F0021, false, "S3 object has a zero byte unzipped size")
} else {
forecast.Add(F0021, true, "S3 object has a non-zero unzipped size")
}
} else {
forecast.Add(F0021, true, "S3 object has a non-zero length less than 50Mb")
}
} else {
if obj.SizeBytes == 0 {
forecast.Add(F0021, false, "S3 object has zero bytes")
} else {
forecast.Add(F0021, false, "S3 object is greater than 50Mb")
}
}
}
}

Expand Down
25 changes: 25 additions & 0 deletions test/templates/forecast/lambda-fail.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,3 +63,28 @@ Resources:
Runtime: nodejs20.x
Timeout: 30
MemorySize: 128

LambdaFunction5:
Type: AWS::Lambda::Function
Properties:
FunctionName: ZeroByteObject
Handler: index.handler
Code:
S3Bucket: ezbeard-rain-notempty
S3Key: zero.zip
Runtime: nodejs20.x
Timeout: 30
MemorySize: 128

LambdaFunction6:
Type: AWS::Lambda::Function
Properties:
FunctionName: ObjectTooLarge
Handler: index.handler
Code:
S3Bucket: ezbeard-rain-notempty
S3Key: cdk.zip
Runtime: nodejs20.x
Timeout: 30
MemorySize: 128

12 changes: 12 additions & 0 deletions test/templates/forecast/lambda-succeed.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,15 @@ Resources:
Runtime: nodejs20.x
Timeout: 30
MemorySize: 128

LambdaFunction3:
Type: AWS::Lambda::Function
Properties:
FunctionName: SmallObjectNonZero
Handler: index.handler
Code:
S3Bucket: ezbeard-rain-notempty
S3Key: small.zip
Runtime: nodejs20.x
Timeout: 30
MemorySize: 128

0 comments on commit 5da2df0

Please sign in to comment.