diff --git a/examples/Getting started with the Amazon S3 Connector for PyTorch.ipynb b/examples/Getting started with the Amazon S3 Connector for PyTorch.ipynb index e710bffa..4f65760f 100644 --- a/examples/Getting started with the Amazon S3 Connector for PyTorch.ipynb +++ b/examples/Getting started with the Amazon S3 Connector for PyTorch.ipynb @@ -98,7 +98,7 @@ "Requirement already satisfied: urllib3>=1.25 in /Volumes/workplace/s3-connector-for-pytorch/.venv-3.10/lib/python3.10/site-packages (from torchdata) (2.0.7)\n", "Requirement already satisfied: requests in /Volumes/workplace/s3-connector-for-pytorch/.venv-3.10/lib/python3.10/site-packages (from torchdata) (2.31.0)\n", "Requirement already satisfied: numpy in /Volumes/workplace/s3-connector-for-pytorch/.venv-3.10/lib/python3.10/site-packages (from torchvision) (1.26.4)\n", - "Requirement already satisfied: botocore<1.35.0,>=1.34.72 in /Volumes/workplace/s3-connector-for-pytorch/.venv-3.10/lib/python3.10/site-packages (from boto3) (1.34.90)\n", + "Requirement already satisfied: botocore<1.35.0,>=1.34.72 in /Volumes/workplace/s3-connector-for-pytorch/.venv-3.10/lib/python3.10/site-packages (from boto3) (1.34.95)\n", "Requirement already satisfied: jmespath<2.0.0,>=0.7.1 in /Volumes/workplace/s3-connector-for-pytorch/.venv-3.10/lib/python3.10/site-packages (from boto3) (1.0.1)\n", "Requirement already satisfied: s3transfer<0.11.0,>=0.10.0 in /Volumes/workplace/s3-connector-for-pytorch/.venv-3.10/lib/python3.10/site-packages (from boto3) (0.10.1)\n", "Requirement already satisfied: braceexpand in /Volumes/workplace/s3-connector-for-pytorch/.venv-3.10/lib/python3.10/site-packages (from webdataset) (0.1.7)\n", @@ -120,7 +120,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "id": "c9df48db-6f2f-4eb0-8db8-761b396bd1d2", "metadata": {}, "outputs": [], @@ -164,7 +164,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "id": "acaf152f-34c9-44b2-a519-bdcb4968cbf9", "metadata": {}, "outputs": [], @@ -185,7 +185,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "id": "a98d6ee1-a827-4b98-a916-e4dea3460d40", "metadata": {}, "outputs": [], @@ -205,7 +205,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "id": "980b2877-d9f5-47c9-89bf-a9ac6a74b7a6", "metadata": {}, "outputs": [], @@ -223,7 +223,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "id": "b7cc5dd4-f87a-493d-8d91-d3cda702ed80", "metadata": {}, "outputs": [ @@ -233,7 +233,7 @@ "271181" ] }, - "execution_count": 5, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -253,7 +253,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "id": "2ae7cdaa-f014-408b-9cb5-2aa7aa2f2305", "metadata": {}, "outputs": [ @@ -265,7 +265,7 @@ "" ] }, - "execution_count": 6, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -284,7 +284,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "id": "ec6f0913-f06d-4fd1-801d-367378a472c1", "metadata": {}, "outputs": [ @@ -294,7 +294,7 @@ "'geonet/images/DISC/DISC.01/2022.001/2022.001.0700.00.DISC.01.jpg'" ] }, - "execution_count": 7, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -313,7 +313,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "id": "7b775f16-d134-478d-ac17-0f67f378c05b", "metadata": {}, "outputs": [ @@ -444,7 +444,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "id": "7e5a91fc-12e7-4db7-a27c-e01f960cb638", "metadata": {}, "outputs": [], @@ -453,12 +453,17 @@ " img = Image.open(object)\n", " return (object.key, torchvision.transforms.functional.pil_to_tensor(img))\n", "\n", - "dataset = s3torchconnector.S3MapDataset.from_prefix(IMAGES_URI, region=REGION, transform=load_image, s3client_config=config)" + "dataset = s3torchconnector.S3MapDataset.from_prefix(\n", + " IMAGES_URI, \n", + " region=REGION, \n", + " transform=load_image, \n", + " s3client_config=config\n", + ")" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "id": "510b01a7-2b90-406c-b8c9-bdd61d888a05", "metadata": {}, "outputs": [ @@ -468,7 +473,7 @@ "torch.Size([3, 1536, 2048])" ] }, - "execution_count": 10, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -488,7 +493,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 13, "id": "78786cf5-16db-480d-8559-808a00201cf0", "metadata": {}, "outputs": [], @@ -506,7 +511,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 14, "id": "3eadf485-93ce-44d0-be04-398ec994092a", "metadata": {}, "outputs": [ @@ -539,7 +544,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 15, "id": "828bdfe2-29f9-4b4a-aa5d-30fa2f6970f4", "metadata": {}, "outputs": [ @@ -547,9 +552,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "('geonet/images/DISC/DISC.01/2022.007/2022.007.1730.00.DISC.01.jpg', 'geonet/images/DISC/DISC.01/2022.007/2022.007.0400.00.DISC.01.jpg', 'geonet/images/DISC/DISC.01/2022.002/2022.002.0440.00.DISC.01.jpg', 'geonet/images/DISC/DISC.01/2022.003/2022.003.0300.00.DISC.01.jpg')\n", - "('geonet/images/DISC/DISC.01/2022.007/2022.007.2200.00.DISC.01.jpg', 'geonet/images/DISC/DISC.01/2022.009/2022.009.1120.00.DISC.01.jpg', 'geonet/images/DISC/DISC.01/2022.004/2022.004.1530.00.DISC.01.jpg', 'geonet/images/DISC/DISC.01/2022.004/2022.004.1950.00.DISC.01.jpg')\n", - "('geonet/images/DISC/DISC.01/2022.005/2022.005.2130.00.DISC.01.jpg', 'geonet/images/DISC/DISC.01/2022.005/2022.005.1710.00.DISC.01.jpg', 'geonet/images/DISC/DISC.01/2022.002/2022.002.0840.00.DISC.01.jpg', 'geonet/images/DISC/DISC.01/2022.006/2022.006.2200.00.DISC.01.jpg')\n" + "('geonet/images/DISC/DISC.01/2022.006/2022.006.0540.00.DISC.01.jpg', 'geonet/images/DISC/DISC.01/2022.009/2022.009.0910.00.DISC.01.jpg', 'geonet/images/DISC/DISC.01/2022.006/2022.006.1620.00.DISC.01.jpg', 'geonet/images/DISC/DISC.01/2022.003/2022.003.0610.00.DISC.01.jpg')\n", + "('geonet/images/DISC/DISC.01/2022.004/2022.004.0500.00.DISC.01.jpg', 'geonet/images/DISC/DISC.01/2022.007/2022.007.1020.00.DISC.01.jpg', 'geonet/images/DISC/DISC.01/2022.008/2022.008.1040.00.DISC.01.jpg', 'geonet/images/DISC/DISC.01/2022.001/2022.001.2010.00.DISC.01.jpg')\n", + "('geonet/images/DISC/DISC.01/2022.004/2022.004.2020.00.DISC.01.jpg', 'geonet/images/DISC/DISC.01/2022.009/2022.009.1740.00.DISC.01.jpg', 'geonet/images/DISC/DISC.01/2022.005/2022.005.1410.00.DISC.01.jpg', 'geonet/images/DISC/DISC.01/2022.008/2022.008.1300.00.DISC.01.jpg')\n" ] } ], @@ -577,7 +582,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 16, "id": "4e0e4346-2cf2-49cf-ba87-726d3c5447fa", "metadata": {}, "outputs": [], @@ -609,7 +614,12 @@ "outputs": [], "source": [ "# Running this cell in a notebook won't work due to quirks between Jupyter and `multiprocessing`\n", - "dataset = s3torchconnector.S3IterableDataset.from_prefix(IMAGES_URI, region=REGION, transform=load_image, s3client_config=config)\n", + "dataset = s3torchconnector.S3IterableDataset.from_prefix(\n", + " IMAGES_URI, \n", + " region=REGION, \n", + " transform=load_image, \n", + " s3client_config=config\n", + ")\n", "dataset = torchdata.datapipes.iter.IterableWrapper(dataset)\n", "dataset = dataset.sharding_filter()\n", "\n", @@ -638,7 +648,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "id": "648443e0-19cd-4665-a999-5b6b1244544e", "metadata": {}, "outputs": [ @@ -747,7 +757,12 @@ "def shard_to_dict(object):\n", " return {\"url\": object.key, \"stream\": object}\n", "\n", - "s3_dataset = s3torchconnector.S3IterableDataset.from_prefix(SHARDS_URI, region=REGION, transform=shard_to_dict, s3client_config=config)\n", + "s3_dataset = s3torchconnector.S3IterableDataset.from_prefix(\n", + " SHARDS_URI, \n", + " region=REGION, \n", + " transform=shard_to_dict, \n", + " s3client_config=config\n", + ")\n", "tar_dataset = webdataset.tariterators.tar_file_expander(s3_dataset)\n", "dataset = torchdata.datapipes.iter.IterableWrapper(tar_dataset, deepcopy=False)\n", "\n", @@ -792,7 +807,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 18, "id": "2ebc7406-4180-4b7d-ab47-41b7e9d22741", "metadata": {}, "outputs": [], @@ -810,7 +825,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 19, "id": "e7d5aac7-4f82-4d03-956d-a9266542ae24", "metadata": {}, "outputs": [], @@ -840,7 +855,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 21, "id": "b97c9300-5306-4f04-97b8-9a738056e678", "metadata": {}, "outputs": [ @@ -850,7 +865,7 @@ "" ] }, - "execution_count": 18, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -891,7 +906,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 22, "id": "8734a5ec06b2957f", "metadata": { "collapsed": false, @@ -943,9 +958,9 @@ "Using cached botocore-1.34.51-py3-none-any.whl (12.0 MB)\n", "Installing collected packages: botocore\n", " Attempting uninstall: botocore\n", - " Found existing installation: botocore 1.34.90\n", - " Uninstalling botocore-1.34.90:\n", - " Successfully uninstalled botocore-1.34.90\n", + " Found existing installation: botocore 1.34.95\n", + " Uninstalling botocore-1.34.95:\n", + " Successfully uninstalled botocore-1.34.95\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "boto3 1.34.72 requires botocore<1.35.0,>=1.34.72, but you have botocore 1.34.51 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed botocore-1.34.51\n", @@ -1022,7 +1037,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 23, "id": "a3650f7b63bdca13", "metadata": { "collapsed": false, @@ -1054,7 +1069,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 24, "id": "6910153d5f25c470", "metadata": { "collapsed": false, @@ -1076,7 +1091,7 @@ "'http://127.0.0.1:9911'" ] }, - "execution_count": 21, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -1088,7 +1103,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 25, "id": "f3e3e354f4b5b72e", "metadata": { "collapsed": false, @@ -1131,7 +1146,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 26, "id": "14d8ce43b8209503", "metadata": { "collapsed": false, @@ -1153,7 +1168,7 @@ "'dev'" ] }, - "execution_count": 23, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -1165,7 +1180,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 27, "id": "3d025f468de6bbcb", "metadata": { "collapsed": false, @@ -1208,7 +1223,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 28, "id": "b95615ef66e97e6f", "metadata": { "collapsed": false,