From c1cd587fe8a9e97f7322588276cecb96e3c5e63a Mon Sep 17 00:00:00 2001 From: Victoria McDonald Date: Tue, 11 Jul 2023 22:04:19 -0700 Subject: [PATCH] Add AWS Lambda tutorial for ESIP summer meeting --- notebooks/aws_lambda_sst/.dockerignore | 1 + notebooks/aws_lambda_sst/.gitignore | 5 + notebooks/aws_lambda_sst/Dockerfile | 18 + notebooks/aws_lambda_sst/LICENSE | 201 ++ notebooks/aws_lambda_sst/README.md | 45 + ...podaac-lambda-invoke-sst-global-mean.ipynb | 815 ++++++ notebooks/aws_lambda_sst/requirements.txt | 30 + .../sst-global-mean-exploratory.ipynb | 2302 +++++++++++++++++ notebooks/aws_lambda_sst/sst.py | 176 ++ .../terraform/.terraform.lock.hcl | 26 + notebooks/aws_lambda_sst/terraform/main.tf | 36 + .../aws_lambda_sst/terraform/sst-lambda.tf | 54 + .../aws_lambda_sst/terraform/terraform.tfvars | 6 + .../aws_lambda_sst/terraform/variables.tf | 46 + 14 files changed, 3761 insertions(+) create mode 100644 notebooks/aws_lambda_sst/.dockerignore create mode 100644 notebooks/aws_lambda_sst/.gitignore create mode 100644 notebooks/aws_lambda_sst/Dockerfile create mode 100644 notebooks/aws_lambda_sst/LICENSE create mode 100644 notebooks/aws_lambda_sst/README.md create mode 100644 notebooks/aws_lambda_sst/podaac-lambda-invoke-sst-global-mean.ipynb create mode 100644 notebooks/aws_lambda_sst/requirements.txt create mode 100644 notebooks/aws_lambda_sst/sst-global-mean-exploratory.ipynb create mode 100644 notebooks/aws_lambda_sst/sst.py create mode 100644 notebooks/aws_lambda_sst/terraform/.terraform.lock.hcl create mode 100644 notebooks/aws_lambda_sst/terraform/main.tf create mode 100644 notebooks/aws_lambda_sst/terraform/sst-lambda.tf create mode 100644 notebooks/aws_lambda_sst/terraform/terraform.tfvars create mode 100644 notebooks/aws_lambda_sst/terraform/variables.tf diff --git a/notebooks/aws_lambda_sst/.dockerignore b/notebooks/aws_lambda_sst/.dockerignore new file mode 100644 index 00000000..56fe0d68 --- /dev/null +++ b/notebooks/aws_lambda_sst/.dockerignore @@ -0,0 +1 @@ +terraform \ No newline at end of file diff --git a/notebooks/aws_lambda_sst/.gitignore b/notebooks/aws_lambda_sst/.gitignore new file mode 100644 index 00000000..3e44bff0 --- /dev/null +++ b/notebooks/aws_lambda_sst/.gitignore @@ -0,0 +1,5 @@ +__pycache__ + +terraform/.terraform/ +terraform.tfstate* +terraform/tfplan \ No newline at end of file diff --git a/notebooks/aws_lambda_sst/Dockerfile b/notebooks/aws_lambda_sst/Dockerfile new file mode 100644 index 00000000..28f038c0 --- /dev/null +++ b/notebooks/aws_lambda_sst/Dockerfile @@ -0,0 +1,18 @@ +# Stage 0 - Create from Python 3.10-alpine3.15 image +FROM amazon/aws-lambda-python:3.9 +RUN yum update -y && yum install -y tcsh + +# Stage 1 - Install dependencies +# FROM stage0 as stage1 +COPY requirements.txt . +RUN pip3 install -r requirements.txt --target "${LAMBDA_TASK_ROOT}" + +# Stage 2 - Copy Generate code +# FROM stage1 as stage2 +COPY . ${LAMBDA_TASK_ROOT} + +# Stage 3 - Execute code +# FROM stage2 as stage3 +LABEL version="0.1" \ + description="Containerized Lambda: SST" +CMD [ "sst.lambda_handler" ] \ No newline at end of file diff --git a/notebooks/aws_lambda_sst/LICENSE b/notebooks/aws_lambda_sst/LICENSE new file mode 100644 index 00000000..261eeb9e --- /dev/null +++ b/notebooks/aws_lambda_sst/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/notebooks/aws_lambda_sst/README.md b/notebooks/aws_lambda_sst/README.md new file mode 100644 index 00000000..bb59e529 --- /dev/null +++ b/notebooks/aws_lambda_sst/README.md @@ -0,0 +1,45 @@ +# AWS Lambda example for SST calculations + +This program uses AWS Lambda to calculate a global mean on the MUR 25km dataset. + +## Files + +sst.py - the code to be run by AWS Lambda. This gets packaged in a Docker container to be deployed to AWS. + +sst-global-mean-exploratory.ipynb - a notebook that explores the MUR25 dataset and runs through the global mean calculation offline. Can be run locally outside of AWS to trial run the code deployed to Lambda in sst.py + +podaac-lambda-invoke-sst-global-mean.ipynb - the main notebook to invoke the Lambda code. Finds the files in Earthdata cloud, invokes Lambda on each, and plots the results as a timeseries. + +Dockerfile - the instructions for Docker to build the container image with to deploy to AWS Lambda + +requirements.txt - the required python packages to include in the Dockerfile. These may be different than the packages required to run the notebooks + +terraform - terraform deploys AWS infrastructure. This folder contains the terraform configuration files + > terraform.tfvars + > main.tf + > sst-lambda.tf + > variables.tf + +## AWS Infrastructure + +This program includes the following AWS services: + +- Lambda function to execute science code, deployed via Docker container. +- AWS IAM role for Lambda function to execute as +- AWS Parameter Store to manage Earthdata login credentials +- S3 bucket to store the output of the Lambda function + +## Deploy AWS Resources with Terraform + +Deploys AWS infrastructure and stores state in an S3 backend. + +To deploy: + +1. Edit `terraform.tfvars` for environment to deploy to. +2. Initialize terraform: `terraform init` +3. Plan terraform modifications: `terraform plan -out=tfplan` +4. Apply terraform modifications: `terraform apply tfplan` + +## Run the notebook to invoke the Lambda function + +Run aws_lambda_sst/podaac-lambda-invoke-sst-global-mean.ipynb. diff --git a/notebooks/aws_lambda_sst/podaac-lambda-invoke-sst-global-mean.ipynb b/notebooks/aws_lambda_sst/podaac-lambda-invoke-sst-global-mean.ipynb new file mode 100644 index 00000000..4d2407fc --- /dev/null +++ b/notebooks/aws_lambda_sst/podaac-lambda-invoke-sst-global-mean.ipynb @@ -0,0 +1,815 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Scale Scientific Analysis in the Cloud with AWS Lambda\n", + "\n", + "### ***IMPORTANT (July 2023):** This tutorial is in development and is still undergoing testing. Running code in AWS Lambda will incur charges in your AWS account. Cost scoping and benchmarking is not yet complete. Please proceed with caution.\n", + "\n", + "This tutorial demonstrates how to plot a timeseries of global mean sea surface temperature values using AWS Lambda to perform the global mean computation. We use the MUR 25km dataset. \n", + "\n", + "This is one example of how to take advantage of AWS Cloud Computing capabilities for scientific research. Note that using AWS Compute services will incur costs that will be charged to your AWS account. As we complete testinng we will include estimates of the compute cost associated with this tutorial. Note that apexpanding the analysis to a longer time period or different dataset will affect the compute costs charged to your AWS account. " + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Prerequisite Steps: Set up AWS infrastructure\n", + "\n", + "This tutorial takes advantage of numerous AWS Services including Lambda, Parameter Store, Elastic Compute Cloud (EC2), Elastic Container Registry (ECR), and Simple Storage Service (S3). Full documentation for setting up these services to run this tutorial is still being developed and will be linked here when complete. \n", + "\n", + "#### Deploy Lambda function using Terraform\n", + "\n", + "AWS Lambda is compute service that runs code in response to events. The Lambda code is packaged in a Docker image, and we use Terraform to handle setting up the AWS services including the Lambda function.\n", + "\n", + "The /terraform/ directory contains the terraform configuration files. Edit terraform.tfvars with the names you want to give the AWS Elastic Container Registry (ecr_repo), the AWS Lambda role (lambda_role), the prefix to use for the Earthdata login parameters, and the AWS profile name you use for your account (optional). For example:\n", + "\n", + " ecr_repo = \"podaac-sst\"\n", + " lambda_role = \"podaac-sst-lambda-role\"\n", + " prefix = \"podaac\"\n", + " profile = \"saml-pub\"\n", + "\n", + "Run the following command to initialize the terraform configuration files:\n", + " \n", + " terraform init\n", + "\n", + "\n", + "Run terraform plan to check infrastructure state:\n", + " \n", + " terraform plan -out=tfplan\n", + "\n", + "If there are no modifications to the infrastructure required and everything looks correct, apply the plan:\n", + " \n", + " terraform apply tfplan\n", + "\n", + "#### Set up Earthdata credentials in AWS Parameter Store\n", + "\n", + "In this tutorial the Lambda function reads data files from the Earthdata S3 bucket directly. To avoid hard-coding Earthdata credentials or packaging a .netrc file in the Docker image that deploys the Lambda code, we use the AWS parameter store to set the Earthdata credentials. This means that the same Lambda code can run without modification in any user environment and will assume the correct EDL that is set in the AWS Parameter Store. \n", + "\n", + "#### Set up S3 bucket to hold granules and results\n", + "\n", + "The Lambda function writes the results of the calculation back to a NetCDF file. In this scenario, there will be one results file generated for each granule processed. The results files are saved to an S3 bucket, where they can persist or be downloaded for further analysis & plotting. You need to create the S3 bucket that the Lambda function will use to save the results files.\n", + "\n", + "#### Test Lambda function\n", + "\n", + "#### Connect to EC2 instance to run this notebook\n", + "\n", + "This notebook cannot be run on a local computer, as it heavily depends on direct in-cloud access. To run this notebook in AWS, connect to an EC2 instance running in the us-west-2 region, [following the instructions in this tutorial](https://podaac.github.io/tutorials/external/July_2022_Earthdata_Webinar.html). Once you have connected to the EC2 instance, you can clone this repository into that environment, install the required packages, and run this notebook. \n", + "\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 1: Log in to Earthdata" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We use the [earthaccess](https://nsidc.github.io/earthaccess/) Python library to handle Earthdata authentication for the initial query to find the granules of interest. " + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "import earthaccess\n", + "import json\n", + "import boto3\n", + "import s3fs\n", + "import xarray as xr\n", + "import matplotlib as mpl\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "EARTHDATA_USERNAME and EARTHDATA_PASSWORD are not set in the current environment, try setting them or use a different strategy (netrc, interactive)\n", + "You're now authenticated with NASA Earthdata Login\n", + "Using token with expiration date: 06/16/2023\n", + "Using .netrc file for EDL\n" + ] + } + ], + "source": [ + "auth = earthaccess.login()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Granules found: 365\n" + ] + } + ], + "source": [ + "granules = earthaccess.search_data(\n", + " short_name='MUR25-JPL-L4-GLOB-v04.2',\n", + " cloud_hosted=True,\n", + " temporal=(\"2022-01-01\", \"2023-01-01\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "granule_paths = [g.data_links(access='direct')[0] for g in granules]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "s3://podaac-ops-cumulus-protected/MUR25-JPL-L4-GLOB-v04.2/20220101090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc\n" + ] + } + ], + "source": [ + "for path in granule_paths:\n", + " print(path)\n", + " break" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 2: Invoke the Lambda function" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Set up a boto3 session to connect to your AWS instance and invoke the Lambda function" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "session = boto3.Session(profile_name='saml-pub')\n", + "\n", + "lambda_client = session.client('lambda', region_name='us-west-2')\n", + "\n", + "s3_results_bucket = \"podaac-sst\"\n", + "\n", + "for granule in granule_paths:\n", + " lambda_payload = {\"input_granule_s3path\": granule, \"output_granule_s3bucket\": s3_results_bucket, \"prefix\":\"podaac\"}\n", + "\n", + " lambda_client.invoke(\n", + " FunctionName=\"podaac-sst\",\n", + " InvocationType=\"Event\",\n", + " Payload=json.dumps(lambda_payload)\n", + " )" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Step 3: Plot results as timeseries" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Open the resulting global mean files in xarray:" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "# set up the connection to the S3 bucket holding the results\n", + "s3_results = s3fs.S3FileSystem(\n", + " anon=False,\n", + " profile='saml-pub'\n", + ")\n", + "\n", + "s3_files = s3_results.glob(\"s3://\" + s3_results_bucket + \"/MUR25/*\")\n", + "\n", + "\n", + "# iterate through s3 files to create a fileset\n", + "fileset = [s3_results.open(file) for file in s3_files]\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "# open all files as an xarray dataset\n", + "data = xr.open_mfdataset(fileset, combine='by_coords', engine='scipy')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:       (time: 365)\n",
+       "Coordinates:\n",
+       "  * time          (time) datetime64[ns] 2022-01-01T09:00:00 ... 2023-01-01T09...\n",
+       "Data variables:\n",
+       "    analysed_sst  (time) float64 dask.array<chunksize=(1,), meta=np.ndarray>
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 365)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2022-01-01T09:00:00 ... 2023-01-01T09...\n", + "Data variables:\n", + " analysed_sst (time) float64 dask.array" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Plot the data using matplotlib:" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0.5, 0, 'Date')" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "mpl.rcParams.update({'font.size': 22})\n", + "\n", + "# set up the figure\n", + "fig = plt.Figure(figsize=(20,5))\n", + "\n", + "# plot the data\n", + "plt.plot(data.time, data.analysed_sst, linewidth='3')\n", + "plt.title('Global Mean Sea Surface Temperature' + '\\n' + '2022, MUR25')\n", + "\n", + "plt.ylabel('degrees C')\n", + "plt.xlabel('Date')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + }, + "vscode": { + "interpreter": { + "hash": "32a40736d00c188d2effbb1afc97ba7e733733270c902b4998c089acce9c970f" + } + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/aws_lambda_sst/requirements.txt b/notebooks/aws_lambda_sst/requirements.txt new file mode 100644 index 00000000..eddc7dc9 --- /dev/null +++ b/notebooks/aws_lambda_sst/requirements.txt @@ -0,0 +1,30 @@ +aiobotocore==2.4.2 +aiohttp==3.8.4 +aioitertools==0.11.0 +aiosignal==1.3.1 +async-timeout==4.0.2 +attrs==22.2.0 +botocore==1.27.59 +certifi==2022.12.7 +charset-normalizer==3.1.0 +frozenlist==1.3.3 +fsspec==2023.3.0 +h5netcdf==1.1.0 +h5py==3.8.0 +idna==3.4 +jmespath==1.0.1 +multidict==6.0.4 +numpy==1.24.2 +packaging==23.0 +pandas==1.5.3 +python-dateutil==2.8.2 +pytz==2022.7.1 +requests==2.28.2 +s3fs==2023.3.0 +scipy==1.10.1 +six==1.16.0 +typing_extensions==4.5.0 +urllib3==1.26.14 +wrapt==1.15.0 +xarray==2023.2.0 +yarl==1.8.2 diff --git a/notebooks/aws_lambda_sst/sst-global-mean-exploratory.ipynb b/notebooks/aws_lambda_sst/sst-global-mean-exploratory.ipynb new file mode 100644 index 00000000..74b08327 --- /dev/null +++ b/notebooks/aws_lambda_sst/sst-global-mean-exploratory.ipynb @@ -0,0 +1,2302 @@ +{ + "cells": [ + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Calculate MUR25 Sea Surface Temperature Global Mean\n", + "\n", + "This notebook demonstrates how we calculate the area-weighted global mean sea surface temperature (SST) from the MUR25 L4 dataset. \n", + "\n", + "You can run this on a local computer by downloading the data using the following command in your terminal:\n", + " \n", + " podaac-data-downloader -c MUR25-JPL-L4-GLOB-v04.2 -d ./data/MUR25-JPL-L4-GLOB-v04.2 --start-date 2022-12-01T00:00:00Z --end-date 2022-12-31T23:00:00Z" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load required libraries and prepare data" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import cartopy.crs as ccrs\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import xarray as xr\n", + "from datetime import date" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset>\n",
+       "Dimensions:           (time: 1, lat: 720, lon: 1440)\n",
+       "Coordinates:\n",
+       "  * time              (time) datetime64[ns] 2022-12-01T09:00:00\n",
+       "  * lat               (lat) float32 -89.88 -89.62 -89.38 ... 89.38 89.62 89.88\n",
+       "  * lon               (lon) float32 -179.9 -179.6 -179.4 ... 179.4 179.6 179.9\n",
+       "Data variables:\n",
+       "    analysed_sst      (time, lat, lon) float32 nan nan nan ... 271.4 271.4 271.4\n",
+       "    analysis_error    (time, lat, lon) float32 nan nan nan ... 0.34 0.34 0.34\n",
+       "    mask              (time, lat, lon) float32 2.0 2.0 2.0 2.0 ... 9.0 9.0 9.0\n",
+       "    sea_ice_fraction  (time, lat, lon) float32 nan nan nan ... 0.97 0.97 0.97\n",
+       "    sst_anomaly       (time, lat, lon) float32 nan nan nan nan ... 0.0 0.0 0.0\n",
+       "Attributes: (12/54)\n",
+       "    Conventions:                CF-1.7, ACDD-1.3\n",
+       "    title:                      Daily 0.25-degree MUR SST, Final product\n",
+       "    summary:                    A low-resolution version of the MUR SST analy...\n",
+       "    keywords:                   Oceans > Ocean Temperature > Sea Surface Temp...\n",
+       "    keywords_vocabulary:        NASA Global Change Master Directory (GCMD) Sc...\n",
+       "    standard_name_vocabulary:   NetCDF Climate and Forecast (CF) Metadata Con...\n",
+       "    ...                         ...\n",
+       "    publisher_name:             GHRSST Project Office\n",
+       "    publisher_url:              https://www.ghrsst.org\n",
+       "    publisher_email:            gpc@ghrsst.org\n",
+       "    file_quality_level:         3\n",
+       "    metadata_link:              http://podaac.jpl.nasa.gov/ws/metadata/datase...\n",
+       "    acknowledgment:             Please acknowledge the use of these data with...
" + ], + "text/plain": [ + "\n", + "Dimensions: (time: 1, lat: 720, lon: 1440)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2022-12-01T09:00:00\n", + " * lat (lat) float32 -89.88 -89.62 -89.38 ... 89.38 89.62 89.88\n", + " * lon (lon) float32 -179.9 -179.6 -179.4 ... 179.4 179.6 179.9\n", + "Data variables:\n", + " analysed_sst (time, lat, lon) float32 nan nan nan ... 271.4 271.4 271.4\n", + " analysis_error (time, lat, lon) float32 nan nan nan ... 0.34 0.34 0.34\n", + " mask (time, lat, lon) float32 2.0 2.0 2.0 2.0 ... 9.0 9.0 9.0\n", + " sea_ice_fraction (time, lat, lon) float32 nan nan nan ... 0.97 0.97 0.97\n", + " sst_anomaly (time, lat, lon) float32 nan nan nan nan ... 0.0 0.0 0.0\n", + "Attributes: (12/54)\n", + " Conventions: CF-1.7, ACDD-1.3\n", + " title: Daily 0.25-degree MUR SST, Final product\n", + " summary: A low-resolution version of the MUR SST analy...\n", + " keywords: Oceans > Ocean Temperature > Sea Surface Temp...\n", + " keywords_vocabulary: NASA Global Change Master Directory (GCMD) Sc...\n", + " standard_name_vocabulary: NetCDF Climate and Forecast (CF) Metadata Con...\n", + " ... ...\n", + " publisher_name: GHRSST Project Office\n", + " publisher_url: https://www.ghrsst.org\n", + " publisher_email: gpc@ghrsst.org\n", + " file_quality_level: 3\n", + " metadata_link: http://podaac.jpl.nasa.gov/ws/metadata/datase...\n", + " acknowledgment: Please acknowledge the use of these data with..." + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ds = xr.load_dataset('~/data/MUR25-JPL-L4-GLOB-v04.2/20221201090000-JPL-L4_GHRSST-SSTfnd-MUR25-GLOB-v02.0-fv04.2.nc')\n", + "ds\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'analysed_sst' (time: 1, lat: 720, lon: 1440)>\n",
+       "array([[[       nan,        nan,        nan, ...,        nan,\n",
+       "                nan,        nan],\n",
+       "        [       nan,        nan,        nan, ...,        nan,\n",
+       "                nan,        nan],\n",
+       "        [       nan,        nan,        nan, ...,        nan,\n",
+       "                nan,        nan],\n",
+       "        ...,\n",
+       "        [-1.7999878, -1.7999878, -1.7999878, ..., -1.7999878,\n",
+       "         -1.7999878, -1.7999878],\n",
+       "        [-1.7999878, -1.7999878, -1.7999878, ..., -1.7999878,\n",
+       "         -1.7999878, -1.7999878],\n",
+       "        [-1.7999878, -1.7999878, -1.7999878, ..., -1.7999878,\n",
+       "         -1.7999878, -1.7999878]]], dtype=float32)\n",
+       "Coordinates:\n",
+       "  * time     (time) datetime64[ns] 2022-12-01T09:00:00\n",
+       "  * lat      (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88\n",
+       "  * lon      (lon) float32 -179.9 -179.6 -179.4 -179.1 ... 179.4 179.6 179.9
" + ], + "text/plain": [ + "\n", + "array([[[ nan, nan, nan, ..., nan,\n", + " nan, nan],\n", + " [ nan, nan, nan, ..., nan,\n", + " nan, nan],\n", + " [ nan, nan, nan, ..., nan,\n", + " nan, nan],\n", + " ...,\n", + " [-1.7999878, -1.7999878, -1.7999878, ..., -1.7999878,\n", + " -1.7999878, -1.7999878],\n", + " [-1.7999878, -1.7999878, -1.7999878, ..., -1.7999878,\n", + " -1.7999878, -1.7999878],\n", + " [-1.7999878, -1.7999878, -1.7999878, ..., -1.7999878,\n", + " -1.7999878, -1.7999878]]], dtype=float32)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2022-12-01T09:00:00\n", + " * lat (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88\n", + " * lon (lon) float32 -179.9 -179.6 -179.4 -179.1 ... 179.4 179.6 179.9" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# select sst variable\n", + "sst = ds.analysed_sst\n", + "\n", + "# convert to degrees Celcius\n", + "sst = sst - 273.15\n", + "sst" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# plot the data\n", + "p = sst.plot(subplot_kws=dict(transform=ccrs.PlateCarree()))\n" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Calculate the area-weighted global mean" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'lat' (lat: 720)>\n",
+       "array([0.00218172, 0.00654498, 0.01090811, 0.01527103, 0.01963366,\n",
+       "       0.02399603, 0.02835783, 0.03271909, 0.03707973, 0.04143966,\n",
+       "       0.04579892, 0.05015719, 0.0545145 , 0.05887078, 0.06322594,\n",
+       "       0.06758001, 0.07193268, 0.07628398, 0.08063382, 0.08498225,\n",
+       "       0.08932894, 0.09367393, 0.09801713, 0.10235848, 0.10669798,\n",
+       "       0.11103535, 0.11537059, 0.11970364, 0.12403442, 0.12836295,\n",
+       "       0.13268891, 0.13701235, 0.14133318, 0.14565133, 0.1499668 ,\n",
+       "       0.15427932, 0.1585889 , 0.16289546, 0.16719891, 0.1714993 ,\n",
+       "       0.1757963 , 0.18008997, 0.1843802 , 0.18866692, 0.19295016,\n",
+       "       0.19722962, 0.20150532, 0.20577718, 0.21004525, 0.2143092 ,\n",
+       "       0.21856907, 0.22282477, 0.22707623, 0.2313235 , 0.23556623,\n",
+       "       0.23980448, 0.24403816, 0.24826722, 0.25249165, 0.25671116,\n",
+       "       0.26092577, 0.26513544, 0.26934004, 0.27353963, 0.2777339 ,\n",
+       "       0.28192288, 0.2861065 , 0.29028466, 0.2944574 , 0.29862443,\n",
+       "       0.30278578, 0.30694136, 0.3110911 , 0.31523505, 0.31937286,\n",
+       "       0.3235046 , 0.32763016, 0.33174962, 0.33586264, 0.33996928,\n",
+       "       0.34406942, 0.34816304, 0.35225013, 0.3563304 , 0.36040387,\n",
+       "       0.3644705 , 0.36853018, 0.37258297, 0.37662855, 0.38066694,\n",
+       "       0.38469812, 0.38872194, 0.3927385 , 0.39674744, 0.40074885,\n",
+       "       0.40474263, 0.4087287 , 0.4127071 , 0.4166775 , 0.42064002,\n",
+       "...\n",
+       "       0.40074885, 0.39674744, 0.3927385 , 0.38872194, 0.38469812,\n",
+       "       0.38066694, 0.37662855, 0.37258297, 0.36853018, 0.3644705 ,\n",
+       "       0.36040387, 0.3563304 , 0.35225013, 0.34816304, 0.34406942,\n",
+       "       0.33996928, 0.33586264, 0.33174962, 0.32763016, 0.3235046 ,\n",
+       "       0.31937286, 0.31523505, 0.3110911 , 0.30694136, 0.30278578,\n",
+       "       0.29862443, 0.2944574 , 0.29028466, 0.2861065 , 0.28192288,\n",
+       "       0.2777339 , 0.27353963, 0.26934004, 0.26513544, 0.26092577,\n",
+       "       0.25671116, 0.25249165, 0.24826722, 0.24403816, 0.23980448,\n",
+       "       0.23556623, 0.2313235 , 0.22707623, 0.22282477, 0.21856907,\n",
+       "       0.2143092 , 0.21004525, 0.20577718, 0.20150532, 0.19722962,\n",
+       "       0.19295016, 0.18866692, 0.1843802 , 0.18008997, 0.1757963 ,\n",
+       "       0.1714993 , 0.16719891, 0.16289546, 0.1585889 , 0.15427932,\n",
+       "       0.1499668 , 0.14565133, 0.14133318, 0.13701235, 0.13268891,\n",
+       "       0.12836295, 0.12403442, 0.11970364, 0.11537059, 0.11103535,\n",
+       "       0.10669798, 0.10235848, 0.09801713, 0.09367393, 0.08932894,\n",
+       "       0.08498225, 0.08063382, 0.07628398, 0.07193268, 0.06758001,\n",
+       "       0.06322594, 0.05887078, 0.0545145 , 0.05015719, 0.04579892,\n",
+       "       0.04143966, 0.03707973, 0.03271909, 0.02835783, 0.02399603,\n",
+       "       0.01963366, 0.01527103, 0.01090811, 0.00654498, 0.00218172],\n",
+       "      dtype=float32)\n",
+       "Coordinates:\n",
+       "  * lat      (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88\n",
+       "Attributes:\n",
+       "    long_name:              latitude\n",
+       "    standard_name:          latitude\n",
+       "    coverage_content_type:  coordinate\n",
+       "    axis:                   Y\n",
+       "    units:                  degrees_north\n",
+       "    valid_min:              -90.0\n",
+       "    valid_max:              90.0\n",
+       "    comment:                geolocations inherited from the input data withou...
" + ], + "text/plain": [ + "\n", + "array([0.00218172, 0.00654498, 0.01090811, 0.01527103, 0.01963366,\n", + " 0.02399603, 0.02835783, 0.03271909, 0.03707973, 0.04143966,\n", + " 0.04579892, 0.05015719, 0.0545145 , 0.05887078, 0.06322594,\n", + " 0.06758001, 0.07193268, 0.07628398, 0.08063382, 0.08498225,\n", + " 0.08932894, 0.09367393, 0.09801713, 0.10235848, 0.10669798,\n", + " 0.11103535, 0.11537059, 0.11970364, 0.12403442, 0.12836295,\n", + " 0.13268891, 0.13701235, 0.14133318, 0.14565133, 0.1499668 ,\n", + " 0.15427932, 0.1585889 , 0.16289546, 0.16719891, 0.1714993 ,\n", + " 0.1757963 , 0.18008997, 0.1843802 , 0.18866692, 0.19295016,\n", + " 0.19722962, 0.20150532, 0.20577718, 0.21004525, 0.2143092 ,\n", + " 0.21856907, 0.22282477, 0.22707623, 0.2313235 , 0.23556623,\n", + " 0.23980448, 0.24403816, 0.24826722, 0.25249165, 0.25671116,\n", + " 0.26092577, 0.26513544, 0.26934004, 0.27353963, 0.2777339 ,\n", + " 0.28192288, 0.2861065 , 0.29028466, 0.2944574 , 0.29862443,\n", + " 0.30278578, 0.30694136, 0.3110911 , 0.31523505, 0.31937286,\n", + " 0.3235046 , 0.32763016, 0.33174962, 0.33586264, 0.33996928,\n", + " 0.34406942, 0.34816304, 0.35225013, 0.3563304 , 0.36040387,\n", + " 0.3644705 , 0.36853018, 0.37258297, 0.37662855, 0.38066694,\n", + " 0.38469812, 0.38872194, 0.3927385 , 0.39674744, 0.40074885,\n", + " 0.40474263, 0.4087287 , 0.4127071 , 0.4166775 , 0.42064002,\n", + "...\n", + " 0.40074885, 0.39674744, 0.3927385 , 0.38872194, 0.38469812,\n", + " 0.38066694, 0.37662855, 0.37258297, 0.36853018, 0.3644705 ,\n", + " 0.36040387, 0.3563304 , 0.35225013, 0.34816304, 0.34406942,\n", + " 0.33996928, 0.33586264, 0.33174962, 0.32763016, 0.3235046 ,\n", + " 0.31937286, 0.31523505, 0.3110911 , 0.30694136, 0.30278578,\n", + " 0.29862443, 0.2944574 , 0.29028466, 0.2861065 , 0.28192288,\n", + " 0.2777339 , 0.27353963, 0.26934004, 0.26513544, 0.26092577,\n", + " 0.25671116, 0.25249165, 0.24826722, 0.24403816, 0.23980448,\n", + " 0.23556623, 0.2313235 , 0.22707623, 0.22282477, 0.21856907,\n", + " 0.2143092 , 0.21004525, 0.20577718, 0.20150532, 0.19722962,\n", + " 0.19295016, 0.18866692, 0.1843802 , 0.18008997, 0.1757963 ,\n", + " 0.1714993 , 0.16719891, 0.16289546, 0.1585889 , 0.15427932,\n", + " 0.1499668 , 0.14565133, 0.14133318, 0.13701235, 0.13268891,\n", + " 0.12836295, 0.12403442, 0.11970364, 0.11537059, 0.11103535,\n", + " 0.10669798, 0.10235848, 0.09801713, 0.09367393, 0.08932894,\n", + " 0.08498225, 0.08063382, 0.07628398, 0.07193268, 0.06758001,\n", + " 0.06322594, 0.05887078, 0.0545145 , 0.05015719, 0.04579892,\n", + " 0.04143966, 0.03707973, 0.03271909, 0.02835783, 0.02399603,\n", + " 0.01963366, 0.01527103, 0.01090811, 0.00654498, 0.00218172],\n", + " dtype=float32)\n", + "Coordinates:\n", + " * lat (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88\n", + "Attributes:\n", + " long_name: latitude\n", + " standard_name: latitude\n", + " coverage_content_type: coordinate\n", + " axis: Y\n", + " units: degrees_north\n", + " valid_min: -90.0\n", + " valid_max: 90.0\n", + " comment: geolocations inherited from the input data withou..." + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# create the weights\n", + "\n", + "weights = np.cos(np.deg2rad(sst.lat))\n", + "weights\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "for lat in sst.lat:\n", + " l = lat.values\n", + " if (l>60) or (l<-60):\n", + " weights.loc[dict(lat=l)] = 0\n", + "\n", + "#weights" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'analysed_sst' ()>\n",
+       "array(20.52885965)
" + ], + "text/plain": [ + "\n", + "array(20.52885965)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# apply weights to data\n", + "sst_weighted = sst.weighted(weights)\n", + "\n", + "# calculate the global mean on the weighted data\n", + "sst_global_mean = sst_weighted.mean()\n", + "\n", + "# display the values\n", + "sst_global_mean" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.DataArray 'analysed_sst' (time: 1)>\n",
+       "array([20.52885965])\n",
+       "Coordinates:\n",
+       "  * time     (time) datetime64[ns] 2022-12-01T09:00:00\n",
+       "Attributes:\n",
+       "    description:   Area-weighted global mean sea surface temperature calculat...\n",
+       "    units:         celcius\n",
+       "    date_created:  Jun-09-2023
" + ], + "text/plain": [ + "\n", + "array([20.52885965])\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 2022-12-01T09:00:00\n", + "Attributes:\n", + " description: Area-weighted global mean sea surface temperature calculat...\n", + " units: celcius\n", + " date_created: Jun-09-2023" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "sst_out = sst_global_mean.expand_dims(time=ds.time)\n", + "sst_out = sst_out.assign_attrs({\n", + " \"description\": \"Area-weighted global mean sea surface temperature calculated using AWS Lambda\",\n", + " \"units\": \"celcius\",\n", + " \"date_created\": date.today().strftime(\"%b-%d-%Y\")\n", + "})\n", + "sst_out" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "scratch", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/notebooks/aws_lambda_sst/sst.py b/notebooks/aws_lambda_sst/sst.py new file mode 100644 index 00000000..727c062a --- /dev/null +++ b/notebooks/aws_lambda_sst/sst.py @@ -0,0 +1,176 @@ +# Imports +import requests +import base64 +from datetime import date +import s3fs +import boto3 +import botocore +import json +import xarray as xr +import numpy as np + +# Constants +S3_ENDPOINT_DICT = { + 'podaac':'https://archive.podaac.earthdata.nasa.gov/s3credentials' +} + +# Handle EDL login & S3 credentials +def get_creds(s3_endpoint, edl_username, edl_password): + """Request and return temporary S3 credentials. + + Taken from: https://archive.podaac.earthdata.nasa.gov/s3credentialsREADME + """ + + login = requests.get( + s3_endpoint, allow_redirects=False + ) + login.raise_for_status() + + auth = f"{edl_username}:{edl_password}" + encoded_auth = base64.b64encode(auth.encode('ascii')) + + auth_redirect = requests.post( + login.headers['location'], + data = {"credentials": encoded_auth}, + headers= { "Origin": s3_endpoint }, + allow_redirects=False + ) + auth_redirect.raise_for_status() + final = requests.get(auth_redirect.headers['location'], allow_redirects=False) + results = requests.get(s3_endpoint, cookies={'accessToken': final.cookies['accessToken']}) + results.raise_for_status() + return json.loads(results.content) + +def get_temp_creds(prefix): + # retreive EDL credentials from AWS Parameter Store + try: + ssm_client = boto3.client('ssm', region_name="us-west-2") + edl_username = ssm_client.get_parameter(Name=f"{prefix}-sst-edl-username", WithDecryption=True)["Parameter"]["Value"] + edl_password = ssm_client.get_parameter(Name=f"{prefix}-sst-edl-password", WithDecryption=True)["Parameter"]["Value"] + print("Retrieved Earthdata login credentials.") + except botocore.exceptions.ClientError as error: + raise error + + # use EDL creds to get AWS S3 Access Keys & Tokens + s3_creds = get_creds(S3_ENDPOINT_DICT[prefix], edl_username, edl_password) + print("Retrieved temporary S3 access credentials.") + + return s3_creds + + +# Science functions & lambda handler +def sst_global_mean(data_in): + """ + Calculate the area-weighted sea surface temperature (sst) global mean + + Parameters + ========== + data_in: xarray.Dataset() + the input dataset + + var_name: string + the variable to calculate the global mean on + + Return + ====== + data_out: ndarray, xarray + the global mean for the provided variable + """ + + # select the sst variable and select single time + data_var = data_in.analysed_sst.isel(time=0) + + # convert to degrees Celcius + data_var = data_var - 273.15 + + # create the weights + weights = np.cos(np.deg2rad(data_var.lat)) + for lat in data_var.lat: + l = lat.values + if (l>60) or (l<-60): + weights.loc[dict(lat=l)] = 0 + + # apply weights to data + data_weighted = data_var.weighted(weights) + + # calculate the global mean on the weighted data + global_mean = data_weighted.mean() + + sst_out = global_mean.expand_dims(time=data_in.time) + sst_out = sst_out.assign_attrs({ + "description": "Area-weighted global mean sea surface temperature calculated using AWS Lambda", + "units": "celcius", + "date_created": date.today().strftime("%b-%d-%Y") + }) + + return sst_out + + +def lambda_handler(event, context): + """Lambda event handler to orchestrate calculation of global mean.""" + + # -------------------- + # Unpack event payload + # -------------------- + + prefix = event["prefix"] + #key = event["s3_key"] # Granule name + + input_granule_path = event["input_granule_s3path"] + input_bucket, folder, input_key = input_granule_path.replace("s3://", "").split("/", 2) + + # get the name of the user's output S3 bucket + output_s3_bucket = event["output_granule_s3bucket"] + + # --------------------------------------------- + # Read data from Earthdata S3 buckets using EDL + # --------------------------------------------- + + # Get EDL credentials from AWS Parameter Store + temp_creds_req = get_temp_creds(prefix) + + # Set up S3 client for Earthdata buckets using EDL creds + s3_client_in = s3fs.S3FileSystem( + anon=False, + key=temp_creds_req['accessKeyId'], + secret=temp_creds_req['secretAccessKey'], + token=temp_creds_req['sessionToken'] + ) + + # open the granule as an s3 obj + s3_file_obj = s3_client_in.open(input_granule_path, mode='rb') + + # ----------------------------------- + # Do science calculations on the data + # ----------------------------------- + + # open data in xarray + ds = xr.open_dataset(s3_file_obj, engine='h5netcdf') + + # process the function + ds_results = sst_global_mean(ds) + + # -------------------------------------------------------------- + # Write results to the user's own S3 bucket for further analysis + # -------------------------------------------------------------- + + output_key = input_key[:-3] + '_mean.nc' + + # create the temp path for Lambda to write results to locally + tmp_file_path = '/tmp/' + output_key + + # write the results to a new netcdf file + try: + ds_results.to_netcdf(tmp_file_path, mode='w') + + except Exception as e: + print("Problem writing to tmp: " + e) + + # Set up S3 client for user output bucket. + s3_out = boto3.client('s3') + + s3_out.upload_file(tmp_file_path, output_s3_bucket, output_key) + + # Close dataset and S3 file objects + ds.close() + diff --git a/notebooks/aws_lambda_sst/terraform/.terraform.lock.hcl b/notebooks/aws_lambda_sst/terraform/.terraform.lock.hcl new file mode 100644 index 00000000..494c9d5c --- /dev/null +++ b/notebooks/aws_lambda_sst/terraform/.terraform.lock.hcl @@ -0,0 +1,26 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/hashicorp/aws" { + version = "4.57.1" + constraints = "~> 4.0" + hashes = [ + "h1:lyiJFRB0nKUS/OkS8OSqxAYZuLWVBIPpN67VGoDyYak=", + "h1:rqJN5HwMnJtHIvIzublREIxUibBFYIKyeQcgOov4DUQ=", + "zh:44200c213ddb138df80d2a5ad86c2ebadbb5fd1d08cd7e4fc56ec6dca927659b", + "zh:469e6fe6a9e99e60cb168d32f05e2e9a83cf161f39160d075ff96f7674c510e1", + "zh:6110ba2c15a2268652ec9ea3797dd0216de84ece428055c49eaf9caa2be1ed62", + "zh:62ed7348acca44f64fc087e879e01cfa4e084c7600cc91e8bb7683f8065a9c79", + "zh:7a80e6fa9b35be178bb566093f7984dd6ffb7ad9d40b9dd5d5907f054f0c3e60", + "zh:8793043c8575a598c1a7cbefcb65ee1776b0061eba719098e552a3adc88f3090", + "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", + "zh:a777a0082114e273b7b3eb14095a3f6f6e703c1aff61ffb1f0846bb869e6dfc7", + "zh:b060c3b2973097f2087a98ac6aad7c9c89fe80f7cf3027019049feafc3f8305b", + "zh:e7035e74563f4486848ea1feb60852175353790bc374e0e97e241a88dc0908f7", + "zh:eaaa8e9eba09ada41e13116d53d4baece04fead8fcf3eab68cca3a67ed738e18", + "zh:ec52d8f95a84fad8fe1aae169c89d0c54d5401f75caae0869ad8182c6b6db65b", + "zh:f0e33174025b1b57ecfbdd09f2a59c2559ee94d7681e5ae09079e2822ec54ecf", + "zh:f69790a21380e5aab9303a252564737333e1e95b5d25567681630e49b17e3ec7", + "zh:ff6053942c40a99904bd407f3c082c1fa8f927ecce0374566eb7e8ee8145e582", + ] +} diff --git a/notebooks/aws_lambda_sst/terraform/main.tf b/notebooks/aws_lambda_sst/terraform/main.tf new file mode 100644 index 00000000..8d10c0a5 --- /dev/null +++ b/notebooks/aws_lambda_sst/terraform/main.tf @@ -0,0 +1,36 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 4.0" + } + } +} + +# Configure the AWS Provider +provider "aws" { + default_tags { + tags = local.default_tags + } + region = var.aws_region + profile = var.profile +} + +# Data sources +data "aws_caller_identity" "current" {} + +data "aws_ecr_repository" "podaac_sst_repo" { + name = var.ecr_repo +} + +data "aws_iam_role" "lambda_execution_role" { + name = var.lambda_role +} + +# Local variables +locals { + account_id = data.aws_caller_identity.current.account_id + default_tags = length(var.default_tags) == 0 ? { + application : var.app_name, + } : var.default_tags +} \ No newline at end of file diff --git a/notebooks/aws_lambda_sst/terraform/sst-lambda.tf b/notebooks/aws_lambda_sst/terraform/sst-lambda.tf new file mode 100644 index 00000000..1d2fb018 --- /dev/null +++ b/notebooks/aws_lambda_sst/terraform/sst-lambda.tf @@ -0,0 +1,54 @@ +# AWS Lambda function +resource "aws_lambda_function" "aws_lambda_error_handler" { + image_uri = "${data.aws_ecr_repository.podaac_sst_repo.repository_url}:latest" + function_name = "${var.prefix}-sst" + role = data.aws_iam_role.lambda_execution_role.arn + package_type = "Image" + memory_size = 6144 + timeout = 900 +} + +# SSM Parameter Store EDL Credentials +resource "aws_ssm_parameter" "aws_ssm_parameter_edl_username" { + name = "${var.prefix}-sst-edl-username" + description = "Earthdata Login username" + type = "SecureString" + value = var.edl_username +} + +resource "aws_ssm_parameter" "aws_ssm_parameter_edl_password" { + name = "${var.prefix}-sst-edl-password" + description = "Earthdata Login password" + type = "SecureString" + value = var.edl_password +} + +# S3 Bucket to hold results +resource "aws_s3_bucket" "aws_s3_bucket_sst" { + bucket = "${var.prefix}-sst" + tags = { Name = "${var.prefix}-sst" } +} + +resource "aws_s3_bucket_public_access_block" "aws_s3_bucket_sst_public_block" { + bucket = aws_s3_bucket.aws_s3_bucket_sst.id + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +resource "aws_s3_bucket_ownership_controls" "aws_s3_bucket_sst_ownership" { + bucket = aws_s3_bucket.aws_s3_bucket_sst.id + rule { + object_ownership = "BucketOwnerEnforced" + } +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "aws_s3_bucket_sst_encryption" { + bucket = aws_s3_bucket.aws_s3_bucket_sst.bucket + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" + } + } +} \ No newline at end of file diff --git a/notebooks/aws_lambda_sst/terraform/terraform.tfvars b/notebooks/aws_lambda_sst/terraform/terraform.tfvars new file mode 100644 index 00000000..b3c7fafc --- /dev/null +++ b/notebooks/aws_lambda_sst/terraform/terraform.tfvars @@ -0,0 +1,6 @@ +ecr_repo = "" +edl_password = "" +edl_username = "" +lambda_role = "" +prefix = "" +profile = "" diff --git a/notebooks/aws_lambda_sst/terraform/variables.tf b/notebooks/aws_lambda_sst/terraform/variables.tf new file mode 100644 index 00000000..73dbfe0b --- /dev/null +++ b/notebooks/aws_lambda_sst/terraform/variables.tf @@ -0,0 +1,46 @@ +variable "app_name" { + type = string + description = "Application name" + default = "SST" +} + +variable "aws_region" { + type = string + description = "AWS region to deploy to" + default = "us-west-2" +} + +variable "default_tags" { + type = map(string) + default = {} +} + +variable "ecr_repo" { + type = string + description = "sst-lambda container image repository name" +} + +variable "edl_password" { + type = string + description = "Earthdata Login password" +} + +variable "edl_username" { + type = string + description = "Earthdata Login useranme" +} + +variable "lambda_role" { + type = string + description = "Name of AWS Lambda IAM role" +} + +variable "prefix" { + type = string + description = "Prefix to add to all AWS resources as a unique identifier" +} + +variable "profile" { + type = string + description = "Named profile to build infrastructure with" +} \ No newline at end of file