From f4f8c6fbce2105fcca6806fb9c2e05e45977f2f1 Mon Sep 17 00:00:00 2001 From: Evgeny Gusarov Date: Wed, 3 Jan 2024 00:58:10 +0300 Subject: [PATCH] Add filebase client --- poetry.lock | 152 ++++++++++++++++++++++++++++++++++++++++++++++- pyproject.toml | 3 + sw_utils/ipfs.py | 84 ++++++++++++++++++++++++++ 3 files changed, 238 insertions(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 6372d07..b82922e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -203,6 +203,24 @@ files = [ [package.extras] tests = ["PyHamcrest (>=2.0.2)", "mypy", "pytest (>=4.6)", "pytest-benchmark", "pytest-cov", "pytest-flake8"] +[[package]] +name = "bases" +version = "0.3.0" +description = "Python library for general Base-N encodings." +optional = false +python-versions = ">=3.7" +files = [ + {file = "bases-0.3.0-py3-none-any.whl", hash = "sha256:a2fef3366f3e522ff473d2e95c21523fe8e44251038d5c6150c01481585ebf5b"}, + {file = "bases-0.3.0.tar.gz", hash = "sha256:70f04a4a45d63245787f9e89095ca11042685b6b64b542ad916575ba3ccd1570"}, +] + +[package.dependencies] +typing-extensions = ">=4.6.0" +typing-validation = ">=1.1.0" + +[package.extras] +dev = ["base58", "mypy", "pylint", "pytest", "pytest-cov"] + [[package]] name = "bitarray" version = "2.8.2" @@ -376,6 +394,44 @@ d = ["aiohttp (>=3.7.4)"] jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"] uvloop = ["uvloop (>=0.15.2)"] +[[package]] +name = "boto3" +version = "1.34.11" +description = "The AWS SDK for Python" +optional = false +python-versions = ">= 3.8" +files = [ + {file = "boto3-1.34.11-py3-none-any.whl", hash = "sha256:1af021e0c6e3040e8de66d403e963566476235bb70f9a8e3f6784813ac2d8026"}, + {file = "boto3-1.34.11.tar.gz", hash = "sha256:31c130a40ec0631059b77d7e87f67ad03ff1685a5b37638ac0c4687026a3259d"}, +] + +[package.dependencies] +botocore = ">=1.34.11,<1.35.0" +jmespath = ">=0.7.1,<2.0.0" +s3transfer = ">=0.10.0,<0.11.0" + +[package.extras] +crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] + +[[package]] +name = "botocore" +version = "1.34.11" +description = "Low-level, data-driven core of boto 3." +optional = false +python-versions = ">= 3.8" +files = [ + {file = "botocore-1.34.11-py3-none-any.whl", hash = "sha256:1ff1398b6ea670e1c01ac67a33af3da854f8e700d3528289c04f319c330d8250"}, + {file = "botocore-1.34.11.tar.gz", hash = "sha256:51905c3d623c60df5dc5794387de7caf886d350180a01a3dfa762e903edb45a9"}, +] + +[package.dependencies] +jmespath = ">=0.7.1,<2.0.0" +python-dateutil = ">=2.1,<3.0.0" +urllib3 = {version = ">=1.25.4,<2.1", markers = "python_version >= \"3.10\""} + +[package.extras] +crt = ["awscrt (==0.19.19)"] + [[package]] name = "cached-property" version = "1.5.2" @@ -1107,6 +1163,17 @@ files = [ {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"}, ] +[[package]] +name = "ipfs-cid" +version = "1.0.0" +description = "A library for building IPFS CID v1 compatible content identifiers using fixed encoding parameters." +optional = false +python-versions = ">=3.8,<4.0" +files = [ + {file = "ipfs_cid-1.0.0-py3-none-any.whl", hash = "sha256:341985f50f893e0e49cf7da0fc0c4ba1afbe57fdea8a215724fd708de4ef96cb"}, + {file = "ipfs_cid-1.0.0.tar.gz", hash = "sha256:a752c87cde68840e27dcb02f4e7cf940297fae43872bb5e6ef3bf73ad31af73d"}, +] + [[package]] name = "ipfshttpclient" version = "0.8.0a2" @@ -1139,6 +1206,17 @@ pipfile-deprecated-finder = ["pip-shims (>=0.5.2)", "pipreqs", "requirementslib" plugins = ["setuptools"] requirements-deprecated-finder = ["pip-api", "pipreqs"] +[[package]] +name = "jmespath" +version = "1.0.1" +description = "JSON Matching Expressions" +optional = false +python-versions = ">=3.7" +files = [ + {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, + {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, +] + [[package]] name = "jsonschema" version = "4.19.2" @@ -1434,6 +1512,44 @@ files = [ {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"}, ] +[[package]] +name = "multiformats" +version = "0.3.1.post4" +description = "Python implementation of multiformats protocols." +optional = false +python-versions = ">=3.7" +files = [ + {file = "multiformats-0.3.1.post4-py3-none-any.whl", hash = "sha256:5b1d61bd8275c9e817bdbee38dbd501b26629011962ee3c86c46e7ccd0b14129"}, + {file = "multiformats-0.3.1.post4.tar.gz", hash = "sha256:d00074fdbc7d603c2084b4c38fa17bbc28173cf2750f51f46fbbc5c4d5605fbb"}, +] + +[package.dependencies] +bases = ">=0.3.0" +multiformats-config = ">=0.3.0" +typing-extensions = ">=4.6.0" +typing-validation = ">=1.1.0" + +[package.extras] +dev = ["blake3", "mmh3", "mypy", "pycryptodomex", "pylint", "pyskein", "pytest", "pytest-cov", "rich"] +full = ["blake3", "mmh3", "pycryptodomex", "pyskein", "rich"] + +[[package]] +name = "multiformats-config" +version = "0.3.1" +description = "Pre-loading configuration module for the 'multiformats' package." +optional = false +python-versions = ">=3.7" +files = [ + {file = "multiformats-config-0.3.1.tar.gz", hash = "sha256:7eaa80ef5d9c5ee9b86612d21f93a087c4a655cbcb68960457e61adbc62b47a7"}, + {file = "multiformats_config-0.3.1-py3-none-any.whl", hash = "sha256:dec4c9d42ed0d9305889b67440f72e8e8d74b82b80abd7219667764b5b0a8e1d"}, +] + +[package.dependencies] +multiformats = "*" + +[package.extras] +dev = ["mypy", "pylint", "pytest", "pytest-cov"] + [[package]] name = "mypy" version = "1.6.1" @@ -2210,6 +2326,23 @@ files = [ {file = "rpds_py-0.10.6.tar.gz", hash = "sha256:4ce5a708d65a8dbf3748d2474b580d606b1b9f91b5c6ab2a316e0b0cf7a4ba50"}, ] +[[package]] +name = "s3transfer" +version = "0.10.0" +description = "An Amazon S3 Transfer Manager" +optional = false +python-versions = ">= 3.8" +files = [ + {file = "s3transfer-0.10.0-py3-none-any.whl", hash = "sha256:3cdb40f5cfa6966e812209d0994f2a4709b561c88e90cf00c2696d2df4e56b2e"}, + {file = "s3transfer-0.10.0.tar.gz", hash = "sha256:d0c8bbf672d5eebbe4e57945e23b972d963f07d82f661cabf678a5c88831595b"}, +] + +[package.dependencies] +botocore = ">=1.33.2,<2.0a.0" + +[package.extras] +crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] + [[package]] name = "setuptools" version = "68.2.2" @@ -2334,6 +2467,23 @@ files = [ {file = "typing_extensions-4.8.0.tar.gz", hash = "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef"}, ] +[[package]] +name = "typing-validation" +version = "1.1.0" +description = "A simple library for runtime type-checking." +optional = false +python-versions = ">=3.7" +files = [ + {file = "typing-validation-1.1.0.tar.gz", hash = "sha256:dbfa1b797e1e6d32cbe3015141d7a60b40404b043db5ae5de889761c6f85d03b"}, + {file = "typing_validation-1.1.0-py3-none-any.whl", hash = "sha256:a2bf0ed7cc91faf17c7530170ebf1a59363e560846de48c5938821e67877bb74"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.6.0", markers = "python_version < \"3.11\""} + +[package.extras] +dev = ["mypy", "pylint", "pytest", "pytest-cov", "rich"] + [[package]] name = "urllib3" version = "2.0.7" @@ -2587,4 +2737,4 @@ multidict = ">=4.0" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "9edc28a22fc73dc598046feec5f4a92fd82d2c981a0e875863ccbc4ffdc8ce10" +content-hash = "389ccb2f0bd66f8936a8d29d2829c37716a14dc3960f54a85e679de58d6b91f5" diff --git a/pyproject.toml b/pyproject.toml index 65968eb..c9687a6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,9 @@ types-requests = "^2.28.11.15" web3 = "==6.11.1" tenacity = "==8.2.3" pyrsistent = "0.19.3" +boto3 = "^1.34.11" +multiformats = "^0.3.1.post4" +ipfs-cid = "^1.0.0" [tool.poetry.group.dev.dependencies] pylint = "^3.0.1" diff --git a/sw_utils/ipfs.py b/sw_utils/ipfs.py index a71d2c0..4162ed3 100644 --- a/sw_utils/ipfs.py +++ b/sw_utils/ipfs.py @@ -5,10 +5,14 @@ from urllib.parse import urljoin import aiohttp +import boto3 +import botocore.config import ipfshttpclient from aiohttp import ClientSession, ClientTimeout +from ipfs_cid import cid_sha256_hash from ipfshttpclient.encoding import Json from ipfshttpclient.exceptions import ErrorResponse +from multiformats import CID from sw_utils.decorators import retry_ipfs_exception from sw_utils.exceptions import IpfsException @@ -203,6 +207,78 @@ async def _upload(self, form_data: aiohttp.FormData) -> str: return _strip_ipfs_prefix(ipfs_id) +class FilebaseS3Client(BaseUploadClient): + """ + Uploads to 3x replicated IPFS storage using S3-compatible api. + """ + + upload_endpoint = 'https://s3.filebase.com' + unpin_endpoint = '' + + def __init__( + self, + aws_access_key_id: str, + aws_secret_access_key: str, + bucket: str, + timeout: int = IPFS_DEFAULT_TIMEOUT, + ): + self.aws_access_key_id = aws_access_key_id + self.aws_secret_access_key = aws_secret_access_key + self.bucket = bucket + self.read_timeout = timeout + self.connect_timeout = timeout + + async def upload_bytes(self, data: bytes) -> str: + if not data: + raise ValueError('Empty data provided') + s3 = self.get_s3_client() + + # S3 api requires unique key for each object + ipfs_hash = cid_sha256_hash(data) + logger.info('ipfs_hash %s', ipfs_hash) + + # Warning: blocking call, not async + res = s3.put_object(Body=data, Bucket=self.bucket, Key=ipfs_hash) + + cid_v0 = res['ResponseMetadata']['HTTPHeaders']['x-amz-meta-cid'] + cid_v1 = _cid_v0_to_v1(cid_v0) + + # `cid_v1` must be the same as `ipfs_hash` + # but there may be issues on Filebase side + # So use the cid returned from Filebase + return cid_v1 + + async def upload_json(self, data: dict | list) -> str: + if not data: + raise ValueError('Empty data provided') + return await self.upload_bytes(_dump_json(data)) + + async def remove(self, ipfs_hash: str) -> None: + if not ipfs_hash: + raise ValueError('Empty IPFS hash provided') + + s3 = self.get_s3_client() + + # Warning: blocking call, not async + s3.delete_object(Bucket=self.bucket, Key=ipfs_hash) + + return None + + def get_s3_client(self) -> Any: + config = botocore.config.Config( + read_timeout=self.read_timeout, + connect_timeout=self.connect_timeout, + retries={'max_attempts': 0}, + ) + return boto3.client( + 's3', + config=config, + endpoint_url=self.upload_endpoint, + aws_access_key_id=self.aws_access_key_id, + aws_secret_access_key=self.aws_secret_access_key, + ) + + class IpfsMultiUploadClient(BaseUploadClient): def __init__(self, clients: list[BaseUploadClient], retry_timeout: int = 120): if len(clients) == 0: @@ -369,3 +445,11 @@ def _strip_ipfs_prefix(ipfs_hash: str) -> str: def _dump_json(data: Any) -> bytes: return Json().encode(data) + + +def _cid_v0_to_v1(cid: str) -> str: + cid_v0 = CID.decode(cid) + if cid_v0.version != 0: + raise ValueError('cid version is not v0') + cid_v1 = cid_v0.set(base='base32', version=1) + return cid_v1.encode()