From e1d8d3b3bd8ed04854b1a0cc682a9f15f8d6aa0b Mon Sep 17 00:00:00 2001 From: Reuven Gonzales Date: Tue, 29 Oct 2024 16:25:24 -0700 Subject: [PATCH] Fix metrics rendering and adds data tests (#2426) * Starting a refactor of metrics_tools * allow creating a basic python env * Add intermediate macro evaluator * remove prints * Adds a "joiner" * In progress fixes * Small refactor * upgrade sqlmesh * 3rd order windows working * Adds testing * Removes dead code * Fixes some bugs in the models * Update poetry * small fix --- poetry.lock | 644 ++++++++++------ pyproject.toml | 3 +- .../metrics_mesh/models/metrics_factories.py | 64 +- .../oso_metrics/active_addresses.sql | 2 +- .../metrics_mesh/oso_metrics/active_days.sql | 2 +- .../oso_metrics/change_in_developers.sql | 31 +- .../metrics_mesh/oso_metrics/commits.sql | 2 +- .../contributor_activity_classification.sql | 32 +- .../developer_activity_classification.sql | 31 +- warehouse/metrics_mesh/oso_metrics/forks.sql | 2 +- .../metrics_mesh/oso_metrics/gas_fees.sql | 2 +- .../oso_metrics/issues_closed.sql | 2 +- .../oso_metrics/issues_opened.sql | 2 +- .../metrics_mesh/oso_metrics/prs_merged.sql | 2 +- .../metrics_mesh/oso_metrics/prs_opened.sql | 2 +- warehouse/metrics_mesh/oso_metrics/stars.sql | 2 +- .../metrics_mesh/oso_metrics/transactions.sql | 2 +- warehouse/metrics_tools/README.md | 19 +- warehouse/metrics_tools/compute/worker.py | 71 ++ warehouse/metrics_tools/compute/wrapper.py | 13 + .../{lib/factories => }/definition.py | 106 +-- .../{lib/factories => factory}/__init__.py | 2 +- warehouse/metrics_tools/factory/factory.py | 696 ++++++++++++++++++ .../factory/fixtures/metrics/active_days.sql | 15 + .../fixtures/metrics/change_in_developers.sql | 64 ++ .../developer_activity_classification.sql | 70 ++ .../factory/fixtures/metrics/visits.sql | 14 + warehouse/metrics_tools/factory/gen_data.py | 180 +++++ .../{lib/factories => factory}/macros.py | 150 +++- .../metrics_tools/factory/test_factory.py | 269 +++++++ warehouse/metrics_tools/intermediate.py | 133 ++++ warehouse/metrics_tools/joiner/__init__.py | 125 ++++ .../fixtures/basic/expected_artifact.sql | 4 + .../fixtures/basic/expected_project.sql | 5 + .../joiner/fixtures/basic/input.sql | 4 + warehouse/metrics_tools/joiner/test_joiner.py | 24 + warehouse/metrics_tools/lib/__init__.py | 0 .../metrics_tools/lib/factories/factory.py | 293 -------- .../metrics_tools/{lib => }/local/__init__.py | 0 .../metrics_tools/{lib => }/local/utils.py | 0 warehouse/metrics_tools/models.py | 130 +++- warehouse/metrics_tools/runner.py | 188 +++++ warehouse/metrics_tools/test_intermediate.py | 114 +++ .../metrics_tools/transformer/__init__.py | 13 + warehouse/metrics_tools/transformer/base.py | 9 + .../metrics_tools/transformer/intermediate.py | 35 + .../metrics_tools/transformer/qualify.py | 13 + .../metrics_tools/transformer/transformer.py | 32 + warehouse/metrics_tools/utils/__init__.py | 4 + warehouse/metrics_tools/utils/dataframes.py | 6 + warehouse/metrics_tools/utils/glot.py | 10 + warehouse/metrics_tools/utils/testing.py | 30 + warehouse/oso_dagster/cbt/utils/compare.py | 6 +- warehouse/oso_dagster/cbt/utils/test_utils.py | 4 +- warehouse/oso_lets_go/cli.py | 2 +- 55 files changed, 2945 insertions(+), 735 deletions(-) create mode 100644 warehouse/metrics_tools/compute/worker.py create mode 100644 warehouse/metrics_tools/compute/wrapper.py rename warehouse/metrics_tools/{lib/factories => }/definition.py (90%) rename warehouse/metrics_tools/{lib/factories => factory}/__init__.py (61%) create mode 100644 warehouse/metrics_tools/factory/factory.py create mode 100644 warehouse/metrics_tools/factory/fixtures/metrics/active_days.sql create mode 100644 warehouse/metrics_tools/factory/fixtures/metrics/change_in_developers.sql create mode 100644 warehouse/metrics_tools/factory/fixtures/metrics/developer_activity_classification.sql create mode 100644 warehouse/metrics_tools/factory/fixtures/metrics/visits.sql create mode 100644 warehouse/metrics_tools/factory/gen_data.py rename warehouse/metrics_tools/{lib/factories => factory}/macros.py (60%) create mode 100644 warehouse/metrics_tools/factory/test_factory.py create mode 100644 warehouse/metrics_tools/intermediate.py create mode 100644 warehouse/metrics_tools/joiner/__init__.py create mode 100644 warehouse/metrics_tools/joiner/fixtures/basic/expected_artifact.sql create mode 100644 warehouse/metrics_tools/joiner/fixtures/basic/expected_project.sql create mode 100644 warehouse/metrics_tools/joiner/fixtures/basic/input.sql create mode 100644 warehouse/metrics_tools/joiner/test_joiner.py delete mode 100644 warehouse/metrics_tools/lib/__init__.py delete mode 100644 warehouse/metrics_tools/lib/factories/factory.py rename warehouse/metrics_tools/{lib => }/local/__init__.py (100%) rename warehouse/metrics_tools/{lib => }/local/utils.py (100%) create mode 100644 warehouse/metrics_tools/runner.py create mode 100644 warehouse/metrics_tools/test_intermediate.py create mode 100644 warehouse/metrics_tools/transformer/__init__.py create mode 100644 warehouse/metrics_tools/transformer/base.py create mode 100644 warehouse/metrics_tools/transformer/intermediate.py create mode 100644 warehouse/metrics_tools/transformer/qualify.py create mode 100644 warehouse/metrics_tools/transformer/transformer.py create mode 100644 warehouse/metrics_tools/utils/__init__.py create mode 100644 warehouse/metrics_tools/utils/dataframes.py create mode 100644 warehouse/metrics_tools/utils/glot.py create mode 100644 warehouse/metrics_tools/utils/testing.py diff --git a/poetry.lock b/poetry.lock index 58089eb57..e751518c5 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "agate" @@ -831,13 +831,13 @@ test = ["pytest"] [[package]] name = "croniter" -version = "3.0.3" +version = "5.0.1" description = "croniter provides iteration for datetime object with cron like format" optional = false python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.6" files = [ - {file = "croniter-3.0.3-py2.py3-none-any.whl", hash = "sha256:b3bd11f270dc54ccd1f2397b813436015a86d30ffc5a7a9438eec1ed916f2101"}, - {file = "croniter-3.0.3.tar.gz", hash = "sha256:34117ec1741f10a7bd0ec3ad7d8f0eb8fa457a2feb9be32e6a2250e158957668"}, + {file = "croniter-5.0.1-py2.py3-none-any.whl", hash = "sha256:eb28439742291f6c10b181df1a5ecf421208b1fc62ef44501daec1780a0b09e9"}, + {file = "croniter-5.0.1.tar.gz", hash = "sha256:7d9b1ef25b10eece48fdf29d8ac52f9b6252abff983ac614ade4f3276294019e"}, ] [package.dependencies] @@ -1285,13 +1285,13 @@ pyarrow = ">=3.0.0" [[package]] name = "dbt-adapters" -version = "1.7.2" +version = "1.8.0" description = "The set of adapter protocols and base functionality that supports integration with dbt-core" optional = false python-versions = ">=3.9.0" files = [ - {file = "dbt_adapters-1.7.2-py3-none-any.whl", hash = "sha256:7c3df8a92bfdbe52e38f42ad07d0c383c68a42611546356c49e26e9422a395e1"}, - {file = "dbt_adapters-1.7.2.tar.gz", hash = "sha256:dc2c82fc36da27ab0d444584063d3aac66f5b65b50f90105bb9e3f4b04043155"}, + {file = "dbt_adapters-1.8.0-py3-none-any.whl", hash = "sha256:ced398d50bd4550c2dafb8956ead8a52e6444c049eaa6b213f8cbe896f80c115"}, + {file = "dbt_adapters-1.8.0.tar.gz", hash = "sha256:4b269c6574493701acf4fd185b29b05f8ed396bff9ba61ba74c80931ba80c7c5"}, ] [package.dependencies] @@ -1945,13 +1945,13 @@ files = [ [[package]] name = "google-api-core" -version = "2.21.0" +version = "2.22.0" description = "Google API client core library" optional = false python-versions = ">=3.7" files = [ - {file = "google_api_core-2.21.0-py3-none-any.whl", hash = "sha256:6869eacb2a37720380ba5898312af79a4d30b8bca1548fb4093e0697dc4bdf5d"}, - {file = "google_api_core-2.21.0.tar.gz", hash = "sha256:4a152fd11a9f774ea606388d423b68aa7e6d6a0ffe4c8266f74979613ec09f81"}, + {file = "google_api_core-2.22.0-py3-none-any.whl", hash = "sha256:a6652b6bd51303902494998626653671703c420f6f4c88cfd3f50ed723e9d021"}, + {file = "google_api_core-2.22.0.tar.gz", hash = "sha256:26f8d76b96477db42b55fd02a33aae4a42ec8b86b98b94969b7333a2c828bf35"}, ] [package.dependencies] @@ -2079,13 +2079,13 @@ tqdm = ["tqdm (>=4.7.4,<5.0.0dev)"] [[package]] name = "google-cloud-bigquery-datatransfer" -version = "3.16.0" +version = "3.17.0" description = "Google Cloud Bigquery Datatransfer API client library" optional = false python-versions = ">=3.7" files = [ - {file = "google_cloud_bigquery_datatransfer-3.16.0-py2.py3-none-any.whl", hash = "sha256:770617e25f8db9f806e1b86a34312c0201ff66ed0eb567235c0fc5b1c5c751b3"}, - {file = "google_cloud_bigquery_datatransfer-3.16.0.tar.gz", hash = "sha256:13261d3432c39883f2059f7360b0126a14b4814ac9f525e1763a2428a599e255"}, + {file = "google_cloud_bigquery_datatransfer-3.17.0-py2.py3-none-any.whl", hash = "sha256:64fe4011b6f562dc30e06c827119c909088a31aabc190924bb06bade1d3aef67"}, + {file = "google_cloud_bigquery_datatransfer-3.17.0.tar.gz", hash = "sha256:e41a332d9afec77673fa906134f1ca6242d7b881e670ae5a88c99e0d2d2de900"}, ] [package.dependencies] @@ -2136,13 +2136,13 @@ grpc = ["grpcio (>=1.38.0,<2.0dev)", "grpcio-status (>=1.38.0,<2.0.dev0)"] [[package]] name = "google-cloud-dataproc" -version = "5.14.0" +version = "5.15.0" description = "Google Cloud Dataproc API client library" optional = false python-versions = ">=3.7" files = [ - {file = "google_cloud_dataproc-5.14.0-py2.py3-none-any.whl", hash = "sha256:ae1a2e5f3b3094f367f578e3c9e0a81faf6854779fd9cbc4659082abf15f25f1"}, - {file = "google_cloud_dataproc-5.14.0.tar.gz", hash = "sha256:a1b0908f05b390e8289fc42c94960b617ea13f9adf3e836c82368a60c27d8852"}, + {file = "google_cloud_dataproc-5.15.0-py2.py3-none-any.whl", hash = "sha256:14dfcf327fa1c2ede3601fbbc1d559ace43682481aef42a182fb158af876c083"}, + {file = "google_cloud_dataproc-5.15.0.tar.gz", hash = "sha256:010e335368d0f47963643e323be03916d3e8556b772acbe50215fd54f156f91f"}, ] [package.dependencies] @@ -2154,13 +2154,13 @@ protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4 [[package]] name = "google-cloud-resource-manager" -version = "1.12.5" +version = "1.13.0" description = "Google Cloud Resource Manager API client library" optional = false python-versions = ">=3.7" files = [ - {file = "google_cloud_resource_manager-1.12.5-py2.py3-none-any.whl", hash = "sha256:2708a718b45c79464b7b21559c701b5c92e6b0b1ab2146d0a256277a623dc175"}, - {file = "google_cloud_resource_manager-1.12.5.tar.gz", hash = "sha256:b7af4254401ed4efa3aba3a929cb3ddb803fa6baf91a78485e45583597de5891"}, + {file = "google_cloud_resource_manager-1.13.0-py2.py3-none-any.whl", hash = "sha256:33beb4528c2b7aee7a97ed843710581a7b4a27f3dd1fa41a0bf3359b3d68853f"}, + {file = "google_cloud_resource_manager-1.13.0.tar.gz", hash = "sha256:ae4bf69443f14b37007d4d84150115b0942e8b01650fd7a1fc6ff4dc1760e5c4"}, ] [package.dependencies] @@ -2172,13 +2172,13 @@ protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4 [[package]] name = "google-cloud-secret-manager" -version = "2.20.2" +version = "2.21.0" description = "Google Cloud Secret Manager API client library" optional = false python-versions = ">=3.7" files = [ - {file = "google_cloud_secret_manager-2.20.2-py2.py3-none-any.whl", hash = "sha256:99b342ff722feef78aa5bad1c05c6be204f8fee01373a2eb6f05dba710b32879"}, - {file = "google_cloud_secret_manager-2.20.2.tar.gz", hash = "sha256:bbe24825e334f9e679e825e70d932118a7ff536e67c1ceb048da44111c87a45c"}, + {file = "google_cloud_secret_manager-2.21.0-py2.py3-none-any.whl", hash = "sha256:b7fed5c2f3be5e10d94053ea3a7c6a7c5813d38da39c678ef6c1137d6e25a310"}, + {file = "google_cloud_secret_manager-2.21.0.tar.gz", hash = "sha256:d1ae84ecf98cfc319c9a3f1012355cebd19317b662cc9dff1a2c36234580807b"}, ] [package.dependencies] @@ -2190,13 +2190,13 @@ protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4 [[package]] name = "google-cloud-service-management" -version = "1.9.0" +version = "1.10.0" description = "Google Cloud Service Management API client library" optional = false python-versions = ">=3.7" files = [ - {file = "google_cloud_service_management-1.9.0-py2.py3-none-any.whl", hash = "sha256:97aa5b91fa3fd97880eddf160f72f689d4832308946a97c33523315eb87f094b"}, - {file = "google_cloud_service_management-1.9.0.tar.gz", hash = "sha256:302f3935540c5835d75ee4b122bc6311a54ebc67d0297427c238f895dcfb731a"}, + {file = "google_cloud_service_management-1.10.0-py2.py3-none-any.whl", hash = "sha256:6ba1d5bbd6446b83ed92899c17329368a87a3002e2e92e8c4f38c1caad6c5e53"}, + {file = "google_cloud_service_management-1.10.0.tar.gz", hash = "sha256:628965fee1658e2886bf2bf5d886960c9240dfb51cecb0a6f5562d31a03428e0"}, ] [package.dependencies] @@ -2208,13 +2208,13 @@ protobuf = ">=3.20.2,<4.21.0 || >4.21.0,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4 [[package]] name = "google-cloud-service-usage" -version = "1.10.5" +version = "1.11.0" description = "Google Cloud Service Usage API client library" optional = false python-versions = ">=3.7" files = [ - {file = "google_cloud_service_usage-1.10.5-py2.py3-none-any.whl", hash = "sha256:73ac8c901c2f61516f1f1a357eb50a2736899cbd3584259dc08ada149c2c3672"}, - {file = "google_cloud_service_usage-1.10.5.tar.gz", hash = "sha256:0bbe9b4f0d82d5aa6eb61c30db3fae0fb21ee1c99250ecb9caad75275ed59aad"}, + {file = "google_cloud_service_usage-1.11.0-py2.py3-none-any.whl", hash = "sha256:a9c710c91222452d5879f906001e25fd80f8df42a2976cffb6c215e538bd08b1"}, + {file = "google_cloud_service_usage-1.11.0.tar.gz", hash = "sha256:9933a9694298b10ad655e59039818d5f37248ee79642213f3ec1607e932bcad7"}, ] [package.dependencies] @@ -2353,22 +2353,23 @@ websockets = ["websockets (>=10,<12)"] [[package]] name = "graphene" -version = "3.4" +version = "3.4.1" description = "GraphQL Framework for Python" optional = false python-versions = "*" files = [ - {file = "graphene-3.4-py2.py3-none-any.whl", hash = "sha256:28bf359b802cdb808130a5521135d4c88a262564598cfdc91628d2c172b99dce"}, - {file = "graphene-3.4.tar.gz", hash = "sha256:65e5ec84c5b7fb4fc41518acfbafb62ebb393d3982fbba00cd5393e431a80b97"}, + {file = "graphene-3.4.1-py2.py3-none-any.whl", hash = "sha256:ca98f853201293871cdc7f55faf2523a9bc077181fe0f4947db5a243e5c67083"}, + {file = "graphene-3.4.1.tar.gz", hash = "sha256:828a8d7b1bce450566a72cc8733716c20f3acfc659960de73dd38f46dc302040"}, ] [package.dependencies] graphql-core = ">=3.1,<3.3" graphql-relay = ">=3.1,<3.3" +python-dateutil = ">=2.7.0,<3" typing-extensions = ">=4.7.1,<5" [package.extras] -dev = ["coveralls (>=3.3,<5)", "pytest (>=8,<9)", "pytest-asyncio (>=0.16,<2)", "pytest-benchmark (>=4,<5)", "pytest-cov (>=5,<6)", "pytest-mock (>=3,<4)", "ruff (==0.5.0)"] +dev = ["coveralls (>=3.3,<5)", "mypy (>=1.10,<2)", "pytest (>=8,<9)", "pytest-asyncio (>=0.16,<2)", "pytest-benchmark (>=4,<5)", "pytest-cov (>=5,<6)", "pytest-mock (>=3,<4)", "ruff (==0.5.0)", "types-python-dateutil (>=2.8.1,<3)"] test = ["coveralls (>=3.3,<5)", "pytest (>=8,<9)", "pytest-asyncio (>=0.16,<2)", "pytest-benchmark (>=4,<5)", "pytest-cov (>=5,<6)", "pytest-mock (>=3,<4)"] [[package]] @@ -2500,70 +2501,70 @@ protobuf = ">=3.20.2,<4.21.1 || >4.21.1,<4.21.2 || >4.21.2,<4.21.3 || >4.21.3,<4 [[package]] name = "grpcio" -version = "1.67.0" +version = "1.67.1" description = "HTTP/2-based RPC framework" optional = false python-versions = ">=3.8" files = [ - {file = "grpcio-1.67.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:bd79929b3bb96b54df1296cd3bf4d2b770bd1df6c2bdf549b49bab286b925cdc"}, - {file = "grpcio-1.67.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:16724ffc956ea42967f5758c2f043faef43cb7e48a51948ab593570570d1e68b"}, - {file = "grpcio-1.67.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:2b7183c80b602b0ad816315d66f2fb7887614ead950416d60913a9a71c12560d"}, - {file = "grpcio-1.67.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:efe32b45dd6d118f5ea2e5deaed417d8a14976325c93812dd831908522b402c9"}, - {file = "grpcio-1.67.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe89295219b9c9e47780a0f1c75ca44211e706d1c598242249fe717af3385ec8"}, - {file = "grpcio-1.67.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:aa8d025fae1595a207b4e47c2e087cb88d47008494db258ac561c00877d4c8f8"}, - {file = "grpcio-1.67.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:f95e15db43e75a534420e04822df91f645664bf4ad21dfaad7d51773c80e6bb4"}, - {file = "grpcio-1.67.0-cp310-cp310-win32.whl", hash = "sha256:a6b9a5c18863fd4b6624a42e2712103fb0f57799a3b29651c0e5b8119a519d65"}, - {file = "grpcio-1.67.0-cp310-cp310-win_amd64.whl", hash = "sha256:b6eb68493a05d38b426604e1dc93bfc0137c4157f7ab4fac5771fd9a104bbaa6"}, - {file = "grpcio-1.67.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:e91d154689639932305b6ea6f45c6e46bb51ecc8ea77c10ef25aa77f75443ad4"}, - {file = "grpcio-1.67.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:cb204a742997277da678611a809a8409657b1398aaeebf73b3d9563b7d154c13"}, - {file = "grpcio-1.67.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:ae6de510f670137e755eb2a74b04d1041e7210af2444103c8c95f193340d17ee"}, - {file = "grpcio-1.67.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:74b900566bdf68241118f2918d312d3bf554b2ce0b12b90178091ea7d0a17b3d"}, - {file = "grpcio-1.67.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a4e95e43447a02aa603abcc6b5e727d093d161a869c83b073f50b9390ecf0fa8"}, - {file = "grpcio-1.67.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:0bb94e66cd8f0baf29bd3184b6aa09aeb1a660f9ec3d85da615c5003154bc2bf"}, - {file = "grpcio-1.67.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:82e5bd4b67b17c8c597273663794a6a46a45e44165b960517fe6d8a2f7f16d23"}, - {file = "grpcio-1.67.0-cp311-cp311-win32.whl", hash = "sha256:7fc1d2b9fd549264ae585026b266ac2db53735510a207381be509c315b4af4e8"}, - {file = "grpcio-1.67.0-cp311-cp311-win_amd64.whl", hash = "sha256:ac11ecb34a86b831239cc38245403a8de25037b448464f95c3315819e7519772"}, - {file = "grpcio-1.67.0-cp312-cp312-linux_armv7l.whl", hash = "sha256:227316b5631260e0bef8a3ce04fa7db4cc81756fea1258b007950b6efc90c05d"}, - {file = "grpcio-1.67.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:d90cfdafcf4b45a7a076e3e2a58e7bc3d59c698c4f6470b0bb13a4d869cf2273"}, - {file = "grpcio-1.67.0-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:77196216d5dd6f99af1c51e235af2dd339159f657280e65ce7e12c1a8feffd1d"}, - {file = "grpcio-1.67.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:15c05a26a0f7047f720da41dc49406b395c1470eef44ff7e2c506a47ac2c0591"}, - {file = "grpcio-1.67.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3840994689cc8cbb73d60485c594424ad8adb56c71a30d8948d6453083624b52"}, - {file = "grpcio-1.67.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:5a1e03c3102b6451028d5dc9f8591131d6ab3c8a0e023d94c28cb930ed4b5f81"}, - {file = "grpcio-1.67.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:682968427a63d898759474e3b3178d42546e878fdce034fd7474ef75143b64e3"}, - {file = "grpcio-1.67.0-cp312-cp312-win32.whl", hash = "sha256:d01793653248f49cf47e5695e0a79805b1d9d4eacef85b310118ba1dfcd1b955"}, - {file = "grpcio-1.67.0-cp312-cp312-win_amd64.whl", hash = "sha256:985b2686f786f3e20326c4367eebdaed3e7aa65848260ff0c6644f817042cb15"}, - {file = "grpcio-1.67.0-cp313-cp313-linux_armv7l.whl", hash = "sha256:8c9a35b8bc50db35ab8e3e02a4f2a35cfba46c8705c3911c34ce343bd777813a"}, - {file = "grpcio-1.67.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:42199e704095b62688998c2d84c89e59a26a7d5d32eed86d43dc90e7a3bd04aa"}, - {file = "grpcio-1.67.0-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:c4c425f440fb81f8d0237c07b9322fc0fb6ee2b29fbef5f62a322ff8fcce240d"}, - {file = "grpcio-1.67.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:323741b6699cd2b04a71cb38f502db98f90532e8a40cb675393d248126a268af"}, - {file = "grpcio-1.67.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:662c8e105c5e5cee0317d500eb186ed7a93229586e431c1bf0c9236c2407352c"}, - {file = "grpcio-1.67.0-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:f6bd2ab135c64a4d1e9e44679a616c9bc944547357c830fafea5c3caa3de5153"}, - {file = "grpcio-1.67.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:2f55c1e0e2ae9bdd23b3c63459ee4c06d223b68aeb1961d83c48fb63dc29bc03"}, - {file = "grpcio-1.67.0-cp313-cp313-win32.whl", hash = "sha256:fd6bc27861e460fe28e94226e3673d46e294ca4673d46b224428d197c5935e69"}, - {file = "grpcio-1.67.0-cp313-cp313-win_amd64.whl", hash = "sha256:cf51d28063338608cd8d3cd64677e922134837902b70ce00dad7f116e3998210"}, - {file = "grpcio-1.67.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:7f200aca719c1c5dc72ab68be3479b9dafccdf03df530d137632c534bb6f1ee3"}, - {file = "grpcio-1.67.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0892dd200ece4822d72dd0952f7112c542a487fc48fe77568deaaa399c1e717d"}, - {file = "grpcio-1.67.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:f4d613fbf868b2e2444f490d18af472ccb47660ea3df52f068c9c8801e1f3e85"}, - {file = "grpcio-1.67.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0c69bf11894cad9da00047f46584d5758d6ebc9b5950c0dc96fec7e0bce5cde9"}, - {file = "grpcio-1.67.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9bca3ca0c5e74dea44bf57d27e15a3a3996ce7e5780d61b7c72386356d231db"}, - {file = "grpcio-1.67.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:014dfc020e28a0d9be7e93a91f85ff9f4a87158b7df9952fe23cc42d29d31e1e"}, - {file = "grpcio-1.67.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:d4ea4509d42c6797539e9ec7496c15473177ce9abc89bc5c71e7abe50fc25737"}, - {file = "grpcio-1.67.0-cp38-cp38-win32.whl", hash = "sha256:9d75641a2fca9ae1ae86454fd25d4c298ea8cc195dbc962852234d54a07060ad"}, - {file = "grpcio-1.67.0-cp38-cp38-win_amd64.whl", hash = "sha256:cff8e54d6a463883cda2fab94d2062aad2f5edd7f06ae3ed030f2a74756db365"}, - {file = "grpcio-1.67.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:62492bd534979e6d7127b8a6b29093161a742dee3875873e01964049d5250a74"}, - {file = "grpcio-1.67.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:eef1dce9d1a46119fd09f9a992cf6ab9d9178b696382439446ca5f399d7b96fe"}, - {file = "grpcio-1.67.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:f623c57a5321461c84498a99dddf9d13dac0e40ee056d884d6ec4ebcab647a78"}, - {file = "grpcio-1.67.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:54d16383044e681f8beb50f905249e4e7261dd169d4aaf6e52eab67b01cbbbe2"}, - {file = "grpcio-1.67.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b2a44e572fb762c668e4812156b81835f7aba8a721b027e2d4bb29fb50ff4d33"}, - {file = "grpcio-1.67.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:391df8b0faac84d42f5b8dfc65f5152c48ed914e13c522fd05f2aca211f8bfad"}, - {file = "grpcio-1.67.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:cfd9306511fdfc623a1ba1dc3bc07fbd24e6cfbe3c28b4d1e05177baa2f99617"}, - {file = "grpcio-1.67.0-cp39-cp39-win32.whl", hash = "sha256:30d47dbacfd20cbd0c8be9bfa52fdb833b395d4ec32fe5cff7220afc05d08571"}, - {file = "grpcio-1.67.0-cp39-cp39-win_amd64.whl", hash = "sha256:f55f077685f61f0fbd06ea355142b71e47e4a26d2d678b3ba27248abfe67163a"}, - {file = "grpcio-1.67.0.tar.gz", hash = "sha256:e090b2553e0da1c875449c8e75073dd4415dd71c9bde6a406240fdf4c0ee467c"}, -] - -[package.extras] -protobuf = ["grpcio-tools (>=1.67.0)"] + {file = "grpcio-1.67.1-cp310-cp310-linux_armv7l.whl", hash = "sha256:8b0341d66a57f8a3119b77ab32207072be60c9bf79760fa609c5609f2deb1f3f"}, + {file = "grpcio-1.67.1-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:f5a27dddefe0e2357d3e617b9079b4bfdc91341a91565111a21ed6ebbc51b22d"}, + {file = "grpcio-1.67.1-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:43112046864317498a33bdc4797ae6a268c36345a910de9b9c17159d8346602f"}, + {file = "grpcio-1.67.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c9b929f13677b10f63124c1a410994a401cdd85214ad83ab67cc077fc7e480f0"}, + {file = "grpcio-1.67.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e7d1797a8a3845437d327145959a2c0c47c05947c9eef5ff1a4c80e499dcc6fa"}, + {file = "grpcio-1.67.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:0489063974d1452436139501bf6b180f63d4977223ee87488fe36858c5725292"}, + {file = "grpcio-1.67.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9fd042de4a82e3e7aca44008ee2fb5da01b3e5adb316348c21980f7f58adc311"}, + {file = "grpcio-1.67.1-cp310-cp310-win32.whl", hash = "sha256:638354e698fd0c6c76b04540a850bf1db27b4d2515a19fcd5cf645c48d3eb1ed"}, + {file = "grpcio-1.67.1-cp310-cp310-win_amd64.whl", hash = "sha256:608d87d1bdabf9e2868b12338cd38a79969eaf920c89d698ead08f48de9c0f9e"}, + {file = "grpcio-1.67.1-cp311-cp311-linux_armv7l.whl", hash = "sha256:7818c0454027ae3384235a65210bbf5464bd715450e30a3d40385453a85a70cb"}, + {file = "grpcio-1.67.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:ea33986b70f83844cd00814cee4451055cd8cab36f00ac64a31f5bb09b31919e"}, + {file = "grpcio-1.67.1-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:c7a01337407dd89005527623a4a72c5c8e2894d22bead0895306b23c6695698f"}, + {file = "grpcio-1.67.1-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:80b866f73224b0634f4312a4674c1be21b2b4afa73cb20953cbbb73a6b36c3cc"}, + {file = "grpcio-1.67.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f9fff78ba10d4250bfc07a01bd6254a6d87dc67f9627adece85c0b2ed754fa96"}, + {file = "grpcio-1.67.1-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:8a23cbcc5bb11ea7dc6163078be36c065db68d915c24f5faa4f872c573bb400f"}, + {file = "grpcio-1.67.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1a65b503d008f066e994f34f456e0647e5ceb34cfcec5ad180b1b44020ad4970"}, + {file = "grpcio-1.67.1-cp311-cp311-win32.whl", hash = "sha256:e29ca27bec8e163dca0c98084040edec3bc49afd10f18b412f483cc68c712744"}, + {file = "grpcio-1.67.1-cp311-cp311-win_amd64.whl", hash = "sha256:786a5b18544622bfb1e25cc08402bd44ea83edfb04b93798d85dca4d1a0b5be5"}, + {file = "grpcio-1.67.1-cp312-cp312-linux_armv7l.whl", hash = "sha256:267d1745894200e4c604958da5f856da6293f063327cb049a51fe67348e4f953"}, + {file = "grpcio-1.67.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:85f69fdc1d28ce7cff8de3f9c67db2b0ca9ba4449644488c1e0303c146135ddb"}, + {file = "grpcio-1.67.1-cp312-cp312-manylinux_2_17_aarch64.whl", hash = "sha256:f26b0b547eb8d00e195274cdfc63ce64c8fc2d3e2d00b12bf468ece41a0423a0"}, + {file = "grpcio-1.67.1-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4422581cdc628f77302270ff839a44f4c24fdc57887dc2a45b7e53d8fc2376af"}, + {file = "grpcio-1.67.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d7616d2ded471231c701489190379e0c311ee0a6c756f3c03e6a62b95a7146e"}, + {file = "grpcio-1.67.1-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8a00efecde9d6fcc3ab00c13f816313c040a28450e5e25739c24f432fc6d3c75"}, + {file = "grpcio-1.67.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:699e964923b70f3101393710793289e42845791ea07565654ada0969522d0a38"}, + {file = "grpcio-1.67.1-cp312-cp312-win32.whl", hash = "sha256:4e7b904484a634a0fff132958dabdb10d63e0927398273917da3ee103e8d1f78"}, + {file = "grpcio-1.67.1-cp312-cp312-win_amd64.whl", hash = "sha256:5721e66a594a6c4204458004852719b38f3d5522082be9061d6510b455c90afc"}, + {file = "grpcio-1.67.1-cp313-cp313-linux_armv7l.whl", hash = "sha256:aa0162e56fd10a5547fac8774c4899fc3e18c1aa4a4759d0ce2cd00d3696ea6b"}, + {file = "grpcio-1.67.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:beee96c8c0b1a75d556fe57b92b58b4347c77a65781ee2ac749d550f2a365dc1"}, + {file = "grpcio-1.67.1-cp313-cp313-manylinux_2_17_aarch64.whl", hash = "sha256:a93deda571a1bf94ec1f6fcda2872dad3ae538700d94dc283c672a3b508ba3af"}, + {file = "grpcio-1.67.1-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0e6f255980afef598a9e64a24efce87b625e3e3c80a45162d111a461a9f92955"}, + {file = "grpcio-1.67.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e838cad2176ebd5d4a8bb03955138d6589ce9e2ce5d51c3ada34396dbd2dba8"}, + {file = "grpcio-1.67.1-cp313-cp313-musllinux_1_1_i686.whl", hash = "sha256:a6703916c43b1d468d0756c8077b12017a9fcb6a1ef13faf49e67d20d7ebda62"}, + {file = "grpcio-1.67.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:917e8d8994eed1d86b907ba2a61b9f0aef27a2155bca6cbb322430fc7135b7bb"}, + {file = "grpcio-1.67.1-cp313-cp313-win32.whl", hash = "sha256:e279330bef1744040db8fc432becc8a727b84f456ab62b744d3fdb83f327e121"}, + {file = "grpcio-1.67.1-cp313-cp313-win_amd64.whl", hash = "sha256:fa0c739ad8b1996bd24823950e3cb5152ae91fca1c09cc791190bf1627ffefba"}, + {file = "grpcio-1.67.1-cp38-cp38-linux_armv7l.whl", hash = "sha256:178f5db771c4f9a9facb2ab37a434c46cb9be1a75e820f187ee3d1e7805c4f65"}, + {file = "grpcio-1.67.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:0f3e49c738396e93b7ba9016e153eb09e0778e776df6090c1b8c91877cc1c426"}, + {file = "grpcio-1.67.1-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:24e8a26dbfc5274d7474c27759b54486b8de23c709d76695237515bc8b5baeab"}, + {file = "grpcio-1.67.1-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3b6c16489326d79ead41689c4b84bc40d522c9a7617219f4ad94bc7f448c5085"}, + {file = "grpcio-1.67.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:60e6a4dcf5af7bbc36fd9f81c9f372e8ae580870a9e4b6eafe948cd334b81cf3"}, + {file = "grpcio-1.67.1-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:95b5f2b857856ed78d72da93cd7d09b6db8ef30102e5e7fe0961fe4d9f7d48e8"}, + {file = "grpcio-1.67.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:b49359977c6ec9f5d0573ea4e0071ad278ef905aa74e420acc73fd28ce39e9ce"}, + {file = "grpcio-1.67.1-cp38-cp38-win32.whl", hash = "sha256:f5b76ff64aaac53fede0cc93abf57894ab2a7362986ba22243d06218b93efe46"}, + {file = "grpcio-1.67.1-cp38-cp38-win_amd64.whl", hash = "sha256:804c6457c3cd3ec04fe6006c739579b8d35c86ae3298ffca8de57b493524b771"}, + {file = "grpcio-1.67.1-cp39-cp39-linux_armv7l.whl", hash = "sha256:a25bdea92b13ff4d7790962190bf6bf5c4639876e01c0f3dda70fc2769616335"}, + {file = "grpcio-1.67.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:cdc491ae35a13535fd9196acb5afe1af37c8237df2e54427be3eecda3653127e"}, + {file = "grpcio-1.67.1-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:85f862069b86a305497e74d0dc43c02de3d1d184fc2c180993aa8aa86fbd19b8"}, + {file = "grpcio-1.67.1-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ec74ef02010186185de82cc594058a3ccd8d86821842bbac9873fd4a2cf8be8d"}, + {file = "grpcio-1.67.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:01f616a964e540638af5130469451cf580ba8c7329f45ca998ab66e0c7dcdb04"}, + {file = "grpcio-1.67.1-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:299b3d8c4f790c6bcca485f9963b4846dd92cf6f1b65d3697145d005c80f9fe8"}, + {file = "grpcio-1.67.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:60336bff760fbb47d7e86165408126f1dded184448e9a4c892189eb7c9d3f90f"}, + {file = "grpcio-1.67.1-cp39-cp39-win32.whl", hash = "sha256:5ed601c4c6008429e3d247ddb367fe8c7259c355757448d7c1ef7bd4a6739e8e"}, + {file = "grpcio-1.67.1-cp39-cp39-win_amd64.whl", hash = "sha256:5db70d32d6703b89912af16d6d45d78406374a8b8ef0d28140351dd0ec610e98"}, + {file = "grpcio-1.67.1.tar.gz", hash = "sha256:3dc2ed4cabea4dc14d5e708c2b426205956077cc5de419b4d4079315017e9732"}, +] + +[package.extras] +protobuf = ["grpcio-tools (>=1.67.1)"] [[package]] name = "grpcio-health-checking" @@ -2864,13 +2865,13 @@ files = [ [[package]] name = "ipython" -version = "8.28.0" +version = "8.29.0" description = "IPython: Productive Interactive Computing" optional = false python-versions = ">=3.10" files = [ - {file = "ipython-8.28.0-py3-none-any.whl", hash = "sha256:530ef1e7bb693724d3cdc37287c80b07ad9b25986c007a53aa1857272dac3f35"}, - {file = "ipython-8.28.0.tar.gz", hash = "sha256:0d0d15ca1e01faeb868ef56bc7ee5a0de5bd66885735682e8a322ae289a13d1a"}, + {file = "ipython-8.29.0-py3-none-any.whl", hash = "sha256:0188a1bd83267192123ccea7f4a8ed0a78910535dbaa3f37671dca76ebd429c8"}, + {file = "ipython-8.29.0.tar.gz", hash = "sha256:40b60e15b22591450eef73e40a027cf77bd652e757523eebc5bd7c7c498290eb"}, ] [package.dependencies] @@ -3477,6 +3478,98 @@ files = [ requests = ">=2.2.1,<3.0" six = ">=1.9.0,<2.0" +[[package]] +name = "mmh3" +version = "4.1.0" +description = "Python extension for MurmurHash (MurmurHash3), a set of fast and robust hash functions." +optional = false +python-versions = "*" +files = [ + {file = "mmh3-4.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:be5ac76a8b0cd8095784e51e4c1c9c318c19edcd1709a06eb14979c8d850c31a"}, + {file = "mmh3-4.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:98a49121afdfab67cd80e912b36404139d7deceb6773a83620137aaa0da5714c"}, + {file = "mmh3-4.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5259ac0535874366e7d1a5423ef746e0d36a9e3c14509ce6511614bdc5a7ef5b"}, + {file = "mmh3-4.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5950827ca0453a2be357696da509ab39646044e3fa15cad364eb65d78797437"}, + {file = "mmh3-4.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1dd0f652ae99585b9dd26de458e5f08571522f0402155809fd1dc8852a613a39"}, + {file = "mmh3-4.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:99d25548070942fab1e4a6f04d1626d67e66d0b81ed6571ecfca511f3edf07e6"}, + {file = "mmh3-4.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:53db8d9bad3cb66c8f35cbc894f336273f63489ce4ac416634932e3cbe79eb5b"}, + {file = "mmh3-4.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:75da0f615eb55295a437264cc0b736753f830b09d102aa4c2a7d719bc445ec05"}, + {file = "mmh3-4.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b926b07fd678ea84b3a2afc1fa22ce50aeb627839c44382f3d0291e945621e1a"}, + {file = "mmh3-4.1.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:c5b053334f9b0af8559d6da9dc72cef0a65b325ebb3e630c680012323c950bb6"}, + {file = "mmh3-4.1.0-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:5bf33dc43cd6de2cb86e0aa73a1cc6530f557854bbbe5d59f41ef6de2e353d7b"}, + {file = "mmh3-4.1.0-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:fa7eacd2b830727ba3dd65a365bed8a5c992ecd0c8348cf39a05cc77d22f4970"}, + {file = "mmh3-4.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:42dfd6742b9e3eec599f85270617debfa0bbb913c545bb980c8a4fa7b2d047da"}, + {file = "mmh3-4.1.0-cp310-cp310-win32.whl", hash = "sha256:2974ad343f0d39dcc88e93ee6afa96cedc35a9883bc067febd7ff736e207fa47"}, + {file = "mmh3-4.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:74699a8984ded645c1a24d6078351a056f5a5f1fe5838870412a68ac5e28d865"}, + {file = "mmh3-4.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:f0dc874cedc23d46fc488a987faa6ad08ffa79e44fb08e3cd4d4cf2877c00a00"}, + {file = "mmh3-4.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3280a463855b0eae64b681cd5b9ddd9464b73f81151e87bb7c91a811d25619e6"}, + {file = "mmh3-4.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:97ac57c6c3301769e757d444fa7c973ceb002cb66534b39cbab5e38de61cd896"}, + {file = "mmh3-4.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a7b6502cdb4dbd880244818ab363c8770a48cdccecf6d729ade0241b736b5ec0"}, + {file = "mmh3-4.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:52ba2da04671a9621580ddabf72f06f0e72c1c9c3b7b608849b58b11080d8f14"}, + {file = "mmh3-4.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5a5fef4c4ecc782e6e43fbeab09cff1bac82c998a1773d3a5ee6a3605cde343e"}, + {file = "mmh3-4.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5135358a7e00991f73b88cdc8eda5203bf9de22120d10a834c5761dbeb07dd13"}, + {file = "mmh3-4.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cff9ae76a54f7c6fe0167c9c4028c12c1f6de52d68a31d11b6790bb2ae685560"}, + {file = "mmh3-4.1.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f6f02576a4d106d7830ca90278868bf0983554dd69183b7bbe09f2fcd51cf54f"}, + {file = "mmh3-4.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:073d57425a23721730d3ff5485e2da489dd3c90b04e86243dd7211f889898106"}, + {file = "mmh3-4.1.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:71e32ddec7f573a1a0feb8d2cf2af474c50ec21e7a8263026e8d3b4b629805db"}, + {file = "mmh3-4.1.0-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:7cbb20b29d57e76a58b40fd8b13a9130db495a12d678d651b459bf61c0714cea"}, + {file = "mmh3-4.1.0-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:a42ad267e131d7847076bb7e31050f6c4378cd38e8f1bf7a0edd32f30224d5c9"}, + {file = "mmh3-4.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:4a013979fc9390abadc445ea2527426a0e7a4495c19b74589204f9b71bcaafeb"}, + {file = "mmh3-4.1.0-cp311-cp311-win32.whl", hash = "sha256:1d3b1cdad7c71b7b88966301789a478af142bddcb3a2bee563f7a7d40519a00f"}, + {file = "mmh3-4.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:0dc6dc32eb03727467da8e17deffe004fbb65e8b5ee2b502d36250d7a3f4e2ec"}, + {file = "mmh3-4.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:9ae3a5c1b32dda121c7dc26f9597ef7b01b4c56a98319a7fe86c35b8bc459ae6"}, + {file = "mmh3-4.1.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0033d60c7939168ef65ddc396611077a7268bde024f2c23bdc283a19123f9e9c"}, + {file = "mmh3-4.1.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d6af3e2287644b2b08b5924ed3a88c97b87b44ad08e79ca9f93d3470a54a41c5"}, + {file = "mmh3-4.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d82eb4defa245e02bb0b0dc4f1e7ee284f8d212633389c91f7fba99ba993f0a2"}, + {file = "mmh3-4.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ba245e94b8d54765e14c2d7b6214e832557e7856d5183bc522e17884cab2f45d"}, + {file = "mmh3-4.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb04e2feeabaad6231e89cd43b3d01a4403579aa792c9ab6fdeef45cc58d4ec0"}, + {file = "mmh3-4.1.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1e3b1a27def545ce11e36158ba5d5390cdbc300cfe456a942cc89d649cf7e3b2"}, + {file = "mmh3-4.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ce0ab79ff736d7044e5e9b3bfe73958a55f79a4ae672e6213e92492ad5e734d5"}, + {file = "mmh3-4.1.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b02268be6e0a8eeb8a924d7db85f28e47344f35c438c1e149878bb1c47b1cd3"}, + {file = "mmh3-4.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:deb887f5fcdaf57cf646b1e062d56b06ef2f23421c80885fce18b37143cba828"}, + {file = "mmh3-4.1.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:99dd564e9e2b512eb117bd0cbf0f79a50c45d961c2a02402787d581cec5448d5"}, + {file = "mmh3-4.1.0-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:08373082dfaa38fe97aa78753d1efd21a1969e51079056ff552e687764eafdfe"}, + {file = "mmh3-4.1.0-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:54b9c6a2ea571b714e4fe28d3e4e2db37abfd03c787a58074ea21ee9a8fd1740"}, + {file = "mmh3-4.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a7b1edf24c69e3513f879722b97ca85e52f9032f24a52284746877f6a7304086"}, + {file = "mmh3-4.1.0-cp312-cp312-win32.whl", hash = "sha256:411da64b951f635e1e2284b71d81a5a83580cea24994b328f8910d40bed67276"}, + {file = "mmh3-4.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:bebc3ecb6ba18292e3d40c8712482b4477abd6981c2ebf0e60869bd90f8ac3a9"}, + {file = "mmh3-4.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:168473dd608ade6a8d2ba069600b35199a9af837d96177d3088ca91f2b3798e3"}, + {file = "mmh3-4.1.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:372f4b7e1dcde175507640679a2a8790185bb71f3640fc28a4690f73da986a3b"}, + {file = "mmh3-4.1.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:438584b97f6fe13e944faf590c90fc127682b57ae969f73334040d9fa1c7ffa5"}, + {file = "mmh3-4.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6e27931b232fc676675fac8641c6ec6b596daa64d82170e8597f5a5b8bdcd3b6"}, + {file = "mmh3-4.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:571a92bad859d7b0330e47cfd1850b76c39b615a8d8e7aa5853c1f971fd0c4b1"}, + {file = "mmh3-4.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4a69d6afe3190fa08f9e3a58e5145549f71f1f3fff27bd0800313426929c7068"}, + {file = "mmh3-4.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:afb127be0be946b7630220908dbea0cee0d9d3c583fa9114a07156f98566dc28"}, + {file = "mmh3-4.1.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:940d86522f36348ef1a494cbf7248ab3f4a1638b84b59e6c9e90408bd11ad729"}, + {file = "mmh3-4.1.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3dcccc4935686619a8e3d1f7b6e97e3bd89a4a796247930ee97d35ea1a39341"}, + {file = "mmh3-4.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:01bb9b90d61854dfc2407c5e5192bfb47222d74f29d140cb2dd2a69f2353f7cc"}, + {file = "mmh3-4.1.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:bcb1b8b951a2c0b0fb8a5426c62a22557e2ffc52539e0a7cc46eb667b5d606a9"}, + {file = "mmh3-4.1.0-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:6477a05d5e5ab3168e82e8b106e316210ac954134f46ec529356607900aea82a"}, + {file = "mmh3-4.1.0-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:da5892287e5bea6977364b15712a2573c16d134bc5fdcdd4cf460006cf849278"}, + {file = "mmh3-4.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:99180d7fd2327a6fffbaff270f760576839dc6ee66d045fa3a450f3490fda7f5"}, + {file = "mmh3-4.1.0-cp38-cp38-win32.whl", hash = "sha256:9b0d4f3949913a9f9a8fb1bb4cc6ecd52879730aab5ff8c5a3d8f5b593594b73"}, + {file = "mmh3-4.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:598c352da1d945108aee0c3c3cfdd0e9b3edef74108f53b49d481d3990402169"}, + {file = "mmh3-4.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:475d6d1445dd080f18f0f766277e1237fa2914e5fe3307a3b2a3044f30892103"}, + {file = "mmh3-4.1.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5ca07c41e6a2880991431ac717c2a049056fff497651a76e26fc22224e8b5732"}, + {file = "mmh3-4.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0ebe052fef4bbe30c0548d12ee46d09f1b69035ca5208a7075e55adfe091be44"}, + {file = "mmh3-4.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eaefd42e85afb70f2b855a011f7b4d8a3c7e19c3f2681fa13118e4d8627378c5"}, + {file = "mmh3-4.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ac0ae43caae5a47afe1b63a1ae3f0986dde54b5fb2d6c29786adbfb8edc9edfb"}, + {file = "mmh3-4.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6218666f74c8c013c221e7f5f8a693ac9cf68e5ac9a03f2373b32d77c48904de"}, + {file = "mmh3-4.1.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ac59294a536ba447b5037f62d8367d7d93b696f80671c2c45645fa9f1109413c"}, + {file = "mmh3-4.1.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:086844830fcd1e5c84fec7017ea1ee8491487cfc877847d96f86f68881569d2e"}, + {file = "mmh3-4.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:e42b38fad664f56f77f6fbca22d08450f2464baa68acdbf24841bf900eb98e87"}, + {file = "mmh3-4.1.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:d08b790a63a9a1cde3b5d7d733ed97d4eb884bfbc92f075a091652d6bfd7709a"}, + {file = "mmh3-4.1.0-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:73ea4cc55e8aea28c86799ecacebca09e5f86500414870a8abaedfcbaf74d288"}, + {file = "mmh3-4.1.0-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:f90938ff137130e47bcec8dc1f4ceb02f10178c766e2ef58a9f657ff1f62d124"}, + {file = "mmh3-4.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:aa1f13e94b8631c8cd53259250556edcf1de71738936b60febba95750d9632bd"}, + {file = "mmh3-4.1.0-cp39-cp39-win32.whl", hash = "sha256:a3b680b471c181490cf82da2142029edb4298e1bdfcb67c76922dedef789868d"}, + {file = "mmh3-4.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:fefef92e9c544a8dbc08f77a8d1b6d48006a750c4375bbcd5ff8199d761e263b"}, + {file = "mmh3-4.1.0-cp39-cp39-win_arm64.whl", hash = "sha256:8e2c1f6a2b41723a4f82bd5a762a777836d29d664fc0095f17910bea0adfd4a6"}, + {file = "mmh3-4.1.0.tar.gz", hash = "sha256:a1cf25348b9acd229dda464a094d6170f47d2850a1fcb762a3b6172d2ce6ca4a"}, +] + +[package.extras] +test = ["mypy (>=1.0)", "pytest (>=7.0.0)"] + [[package]] name = "more-itertools" version = "10.5.0" @@ -4319,17 +4412,17 @@ files = [ [[package]] name = "polars" -version = "1.11.0" +version = "1.12.0" description = "Blazingly fast DataFrame library" optional = false python-versions = ">=3.9" files = [ - {file = "polars-1.11.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:d20152fc29b83ffa4ca7d92b056866b1755dda346a3841106d9b361ccc96d94b"}, - {file = "polars-1.11.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:fd48e8f607ae42f49abf4491e67fb1ad7d85157cb0a45a164fc4d1760d67e8ef"}, - {file = "polars-1.11.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1293f826e5469626d2a4da5e66afb0b46c6f8cb43d16e301d99aa5b911518c34"}, - {file = "polars-1.11.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:0c41c79fc7e2159a0d8fb69a3d0d26c402846d10fe6ff772b2591766e39dfac4"}, - {file = "polars-1.11.0-cp39-abi3-win_amd64.whl", hash = "sha256:a361d50ab5b0a6387bfe07a8a755bad7e61ba3d03381e4d1e343f49f6f0eb893"}, - {file = "polars-1.11.0.tar.gz", hash = "sha256:4fbdd772b5f4538eb9f5ae4f3256290dba1f6c6b9d5226aed918801ed51089f4"}, + {file = "polars-1.12.0-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:8f3c4e4e423c373dda07b4c8a7ff12aa02094b524767d0ca306b1eba67f2d99e"}, + {file = "polars-1.12.0-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:aa6f9862f0cec6353243920d9b8d858c21ec8f25f91af203dea6ff91980e140d"}, + {file = "polars-1.12.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afb03647b5160737d2119532ee8ffe825de1d19d87f81bbbb005131786f7d59b"}, + {file = "polars-1.12.0-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:ea96aba5eb3dab8f0e6abf05ab3fc2136b329261860ef8661d20f5456a2d78e0"}, + {file = "polars-1.12.0-cp39-abi3-win_amd64.whl", hash = "sha256:a228a4b320a36d03a9ec9dfe7241b6d80a2f119b2dceb1da953166655e4cf43c"}, + {file = "polars-1.12.0.tar.gz", hash = "sha256:fb5c92de1a8f7d0a3f923fe48ea89eb518bdf55315ae917012350fa072bd64f4"}, ] [package.dependencies] @@ -4872,6 +4965,85 @@ files = [ [package.extras] windows-terminal = ["colorama (>=0.4.6)"] +[[package]] +name = "pyiceberg" +version = "0.7.1" +description = "Apache Iceberg is an open table format for huge analytic datasets" +optional = false +python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" +files = [ + {file = "pyiceberg-0.7.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:9e0cc837d41e100df81f1f5e580a89668aade694d8c616941d6e11c3a27e49cb"}, + {file = "pyiceberg-0.7.1-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:71c053c2d484505d1beabd7d5167fe2e835ca865f52ad91ef4852f0d91fa4a25"}, + {file = "pyiceberg-0.7.1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:0549ab1843bc07037a7d212c2db527ff1755f5d8f80420907952b5b080eb3663"}, + {file = "pyiceberg-0.7.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec4a8000f0bb6ce6ec47f3368ca99f3191e9105662eeef7be2fbb493363cba96"}, + {file = "pyiceberg-0.7.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0ef6636d3cf370b796529f9a8dbd84e892a2151f0310a8015b9a1e702647ad90"}, + {file = "pyiceberg-0.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:9b49320f3e9624075879a4ddb4fa5ddff7d4a03f6561ad6fd73d514c63095367"}, + {file = "pyiceberg-0.7.1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:27e9b4033691411ef7c49d93df7b3b7f3ed85fe8019cbf0dab5a5ba888b27f34"}, + {file = "pyiceberg-0.7.1-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:7262ba4f95e05a1421567e24c0db57288dc59974c94676aba34afef121544694"}, + {file = "pyiceberg-0.7.1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3eb1fc1d47085b16973999c2111d252fab2a394625c0f25da6515b8c3233c853"}, + {file = "pyiceberg-0.7.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1856c5d64197c9335817b8cf7081e490b601385623e5178cb094ee645d4fb24c"}, + {file = "pyiceberg-0.7.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b6b64006c361220ce103b5bb2f50381a3f851452668adf5a6c61d39f5611e832"}, + {file = "pyiceberg-0.7.1-cp311-cp311-win_amd64.whl", hash = "sha256:57a0b1fb390d26a5b7155de011300300058343e5c2561f4839d69c1775df1d7e"}, + {file = "pyiceberg-0.7.1-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:84f2119705e31929aa75beb9a8ce97210e56f498e863e31dc499a2120c2842bd"}, + {file = "pyiceberg-0.7.1-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:f99ab8d71a2968af0b512fff1d3dcbd145705a95a26b05121c0df712683c9e0c"}, + {file = "pyiceberg-0.7.1-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:5dc17aa1f53f5b8be12eae35dbcb9885b2534138bdecd31a0088680651fbb98e"}, + {file = "pyiceberg-0.7.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:917fdfd372922f9534fe9b6652881a79f82f94d7d3645ddb1925688e3d9aaf4d"}, + {file = "pyiceberg-0.7.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:910fab27c039d62a1fe4a199aaea63d08ada30ead6fd27d56bf038c487837691"}, + {file = "pyiceberg-0.7.1-cp38-cp38-win_amd64.whl", hash = "sha256:98db6d18dca335682c32b25406d7ab5afad8f1baea4fbdafda166cbc6557409c"}, + {file = "pyiceberg-0.7.1-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:c76ea44cc1b02c15b65e1b0cc81b5b3f813ba40a4e262416d7a1e84345f44cf1"}, + {file = "pyiceberg-0.7.1-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:57485e9331c7e8b1771ea1b2ecdc417dc7a13c7a9a538d74f3f00de98676958b"}, + {file = "pyiceberg-0.7.1-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:bbc79698292482360be86f8d728237b78ef8eb416e21aea9d53e4a1b4f429ce7"}, + {file = "pyiceberg-0.7.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f84d912fc12866f22882f5de157cbbfab3dcbad8e0a4378557e5b84a0c3f360"}, + {file = "pyiceberg-0.7.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:f86c535735e57f1a0c76fd0f505e0b172cc212c96a3789f3845220695e792157"}, + {file = "pyiceberg-0.7.1-cp39-cp39-win_amd64.whl", hash = "sha256:d8bee5aa4b34e6028f0465cf405bc4e963e160ac52efbe4bdbc499bb55bc2780"}, + {file = "pyiceberg-0.7.1-pp310-pypy310_pp73-macosx_12_0_x86_64.whl", hash = "sha256:9ae56197db8570553491173adfd2e01a03ae116a1f9fa78ba5a1a1c4e2ad3dbf"}, + {file = "pyiceberg-0.7.1-pp310-pypy310_pp73-macosx_13_0_x86_64.whl", hash = "sha256:e28adc58500ca72e45a07ee4dcd90b63699a8875f178001bd12ace37294c5814"}, + {file = "pyiceberg-0.7.1-pp310-pypy310_pp73-macosx_14_0_arm64.whl", hash = "sha256:1ae47f2d0e87dccd158ae8dafc47125f9739858068fc3add8940f5585ea40ead"}, + {file = "pyiceberg-0.7.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb94c3e11354f85daafb2b2f3e13a245bcb35848135b5ed4e8c83e61393c36ea"}, + {file = "pyiceberg-0.7.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:4fe212b0594128d183711c6efb1a40ea5f17372e11595a84f4565eb9fe97c703"}, + {file = "pyiceberg-0.7.1-pp38-pypy38_pp73-macosx_12_0_x86_64.whl", hash = "sha256:35ce27243b86f7057fbd4594dbe5d6b2a1ccd738ba6b65c2a4f3af249f1e8364"}, + {file = "pyiceberg-0.7.1-pp38-pypy38_pp73-macosx_13_0_x86_64.whl", hash = "sha256:56e254623669ab03e779e4b696b7e36cd1c6973e8523200ccc232695742e269d"}, + {file = "pyiceberg-0.7.1-pp38-pypy38_pp73-macosx_14_0_arm64.whl", hash = "sha256:e07b59a5998c6d4cac258763c6c160234e1e3362a2097808bd02e05e0c16208a"}, + {file = "pyiceberg-0.7.1-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cde005aa075fc0e5ed0095438b0a4d39534e3cb84889b93d6aa265dd2e072eff"}, + {file = "pyiceberg-0.7.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:1950f2186f1c99e0d187ffee86e2f8d6bbbad9b0079573a7255b85ffaaa82e79"}, + {file = "pyiceberg-0.7.1-pp39-pypy39_pp73-macosx_12_0_x86_64.whl", hash = "sha256:273b4b642168a5e64fedc0073e18fd481b11d6891f9e44ceb5ce27126fe418f7"}, + {file = "pyiceberg-0.7.1-pp39-pypy39_pp73-macosx_13_0_x86_64.whl", hash = "sha256:9a2dbc621cdd4f0c92f5b2520f2b266b976317ff8a984aec2ce9240ee3d80471"}, + {file = "pyiceberg-0.7.1-pp39-pypy39_pp73-macosx_14_0_arm64.whl", hash = "sha256:34c2d6e9d027b66f8d531fcefeb5cda8b2a37e70170c01f6f1c977954d733c45"}, + {file = "pyiceberg-0.7.1-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3e97fb65862db191685355e1eb8d97d41d00679a3df1fbd7a1c2560b9e3e6d8"}, + {file = "pyiceberg-0.7.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:98a0de3c2f194907b07522769facbcacdff0ec9577f9710273ba7e0aa8465652"}, + {file = "pyiceberg-0.7.1.tar.gz", hash = "sha256:2fd8f9717b02673cb9cabe7aed82fc38933241b2bd15cbdc1ff7371e70317a47"}, +] + +[package.dependencies] +click = ">=7.1.1,<9.0.0" +fsspec = ">=2023.1.0,<2025.1.0" +mmh3 = ">=4.0.0,<5.0.0" +pydantic = ">=2.0,<2.4.0 || >2.4.0,<2.4.1 || >2.4.1,<3.0" +pyparsing = ">=3.1.0,<4.0.0" +requests = ">=2.20.0,<3.0.0" +rich = ">=10.11.0,<14.0.0" +sortedcontainers = "2.4.0" +strictyaml = ">=1.7.0,<2.0.0" +tenacity = ">=8.2.3,<9.0.0" +thrift = {version = ">=0.13.0,<1.0.0", optional = true, markers = "extra == \"hive\""} + +[package.extras] +adlfs = ["adlfs (>=2023.1.0,<2024.8.0)"] +daft = ["getdaft (>=0.2.12)"] +duckdb = ["duckdb (>=0.5.0,<2.0.0)", "numpy (>=1.22.4,<2.0.0)", "pyarrow (>=9.0.0,<18.0.0)"] +dynamodb = ["boto3 (>=1.24.59)"] +gcsfs = ["gcsfs (>=2023.1.0,<2024.1.0)"] +glue = ["boto3 (>=1.24.59)", "mypy-boto3-glue (>=1.28.18)"] +hive = ["thrift (>=0.13.0,<1.0.0)"] +pandas = ["numpy (>=1.22.4,<2.0.0)", "pandas (>=1.0.0,<3.0.0)", "pyarrow (>=9.0.0,<18.0.0)"] +pyarrow = ["numpy (>=1.22.4,<2.0.0)", "pyarrow (>=9.0.0,<18.0.0)"] +ray = ["numpy (>=1.22.4,<2.0.0)", "pandas (>=1.0.0,<3.0.0)", "pyarrow (>=9.0.0,<18.0.0)", "ray (==2.10.0)", "ray (>=2.10.0,<3.0.0)"] +s3fs = ["s3fs (>=2023.1.0,<2024.1.0)"] +snappy = ["python-snappy (>=0.6.0,<1.0.0)"] +sql-postgres = ["psycopg2-binary (>=2.9.6)", "sqlalchemy (>=2.0.18,<3.0.0)"] +sql-sqlite = ["sqlalchemy (>=2.0.18,<3.0.0)"] +zstandard = ["zstandard (>=0.13.0,<1.0.0)"] + [[package]] name = "pykube-ng" version = "23.6.0" @@ -5620,23 +5792,23 @@ files = [ [[package]] name = "setuptools" -version = "75.2.0" +version = "75.3.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "setuptools-75.2.0-py3-none-any.whl", hash = "sha256:a7fcb66f68b4d9e8e66b42f9876150a3371558f98fa32222ffaa5bced76406f8"}, - {file = "setuptools-75.2.0.tar.gz", hash = "sha256:753bb6ebf1f465a1912e19ed1d41f403a79173a9acf66a42e7e6aec45c3c16ec"}, + {file = "setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd"}, + {file = "setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686"}, ] [package.extras] check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"] -core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] enabler = ["pytest-enabler (>=2.2)"] -test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] -type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.11.*)", "pytest-mypy"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.12.*)", "pytest-mypy"] [[package]] name = "shandy-sqlfmt" @@ -5954,13 +6126,13 @@ sqlcipher = ["sqlcipher3_binary"] [[package]] name = "sqlfluff" -version = "3.2.4" +version = "3.2.5" description = "The SQL Linter for Humans" optional = false python-versions = ">=3.8" files = [ - {file = "sqlfluff-3.2.4-py3-none-any.whl", hash = "sha256:a03a152e01824b2b636235fb2703270367bd6695d1644345a29f648a44f6319b"}, - {file = "sqlfluff-3.2.4.tar.gz", hash = "sha256:a027ded8bea1f10a4de6173e3f02363cba37ab9e344432292553549a24028931"}, + {file = "sqlfluff-3.2.5-py3-none-any.whl", hash = "sha256:ae9ff821986b5b0dd1ea858392db7f0eb80343c2cdeee7900fa031f581e04643"}, + {file = "sqlfluff-3.2.5.tar.gz", hash = "sha256:39822db2c6ad7dac9f6e43d36a3d086c503c051b09665d14a5bdf644770f6ef6"}, ] [package.dependencies] @@ -5979,29 +6151,29 @@ tqdm = "*" [[package]] name = "sqlfluff-templater-dbt" -version = "3.2.4" +version = "3.2.5" description = "Lint your dbt project SQL" optional = false python-versions = "*" files = [ - {file = "sqlfluff_templater_dbt-3.2.4-py3-none-any.whl", hash = "sha256:9ff32f5f8267104966650597ea6375392cc00fa5cb3ccfaf69c0974758e22abe"}, - {file = "sqlfluff_templater_dbt-3.2.4.tar.gz", hash = "sha256:c317f3d86d4f094679004cef76176a2c5cc8bd7f71e7ce6b9e6ab0cd8d2df21c"}, + {file = "sqlfluff_templater_dbt-3.2.5-py3-none-any.whl", hash = "sha256:512d9f5b73d3d362c1983baf8c716c12a469604420c6dcc135e570a647490378"}, + {file = "sqlfluff_templater_dbt-3.2.5.tar.gz", hash = "sha256:5b5bc4612d707e514c6892367c61ae45f61dac4818de7e2dd037089e6b131dab"}, ] [package.dependencies] dbt-core = ">=1.4.1" jinja2-simple-tags = ">=0.3.1" -sqlfluff = "3.2.4" +sqlfluff = "3.2.5" [[package]] name = "sqlglot" -version = "25.24.5" +version = "25.27.0" description = "An easily customizable SQL parser and transpiler" optional = false python-versions = ">=3.7" files = [ - {file = "sqlglot-25.24.5-py3-none-any.whl", hash = "sha256:f8a8870d1f5cdd2e2dc5c39a5030a0c7b0a91264fb8972caead3dac8e8438873"}, - {file = "sqlglot-25.24.5.tar.gz", hash = "sha256:6d3d604034301ca3b614d6b4148646b4033317b7a93d1801e9661495eb4b4fcf"}, + {file = "sqlglot-25.27.0-py3-none-any.whl", hash = "sha256:6dbed60971542f1a69775f1f642359097859df4db440d4601cb75cb4cd9fe936"}, + {file = "sqlglot-25.27.0.tar.gz", hash = "sha256:0a225bd0967bb1830a4fa9e9072eb1a4e74e9f80b5ff99f4bba52a60330322e9"}, ] [package.dependencies] @@ -6083,13 +6255,13 @@ files = [ [[package]] name = "sqlmesh" -version = "0.125.6" +version = "0.129.0" description = "" optional = false python-versions = "*" files = [ - {file = "sqlmesh-0.125.6-py3-none-any.whl", hash = "sha256:05d93501f841d94405a27af999317da686bc4998730e8e0c3993d4a90e2b5136"}, - {file = "sqlmesh-0.125.6.tar.gz", hash = "sha256:fcad969dbe654566bdcb0406a00878b0d8a45ac9264c0830f69456f98b7e896d"}, + {file = "sqlmesh-0.129.0-py3-none-any.whl", hash = "sha256:fea4ee3a6bc1939b4b2e4239048f2fc71743674f739b6633f740a7547cbcd5de"}, + {file = "sqlmesh-0.129.0.tar.gz", hash = "sha256:0d04147be13591c51fac4c2fb51b9885a1a523e0a75e1ee4415076f746444c77"}, ] [package.dependencies] @@ -6107,7 +6279,8 @@ pydantic = "*" requests = "*" rich = {version = "*", extras = ["jupyter"]} "ruamel.yaml" = "*" -sqlglot = {version = ">=25.24.3,<25.25.0", extras = ["rs"]} +sqlglot = {version = ">=25.27.0,<25.28.0", extras = ["rs"]} +tenacity = "*" trino = {version = "*", optional = true, markers = "extra == \"trino\""} [package.extras] @@ -6117,7 +6290,8 @@ cicdtest = ["dbt-athena-community", "dbt-databricks", "dbt-redshift", "dbt-sqlse clickhouse = ["clickhouse-connect"] databricks = ["databricks-sql-connector"] dbt = ["dbt-core (<2)"] -dev = ["Faker", "PyGithub", "agate (==1.7.1)", "apache-airflow (==2.9.1)", "beautifulsoup4", "cryptography (>=42.0.4,<42.1.0)", "custom-materializations", "dbt-bigquery", "dbt-core", "dbt-duckdb (>=1.7.1)", "dbt-snowflake", "google-auth", "google-cloud-bigquery", "google-cloud-bigquery-storage", "mypy (>=1.10.0,<1.11.0)", "pandas-stubs", "pre-commit", "psycopg2-binary", "pydantic (<2.6.0)", "pyspark (>=3.5.0,<3.6.0)", "pytest", "pytest-asyncio (<0.23.0)", "pytest-mock", "pytest-retry", "pytest-xdist", "pytz", "ruff (>=0.4.0,<0.5.0)", "snowflake-connector-python[pandas,secure-local-storage] (>=3.0.2)", "sqlalchemy-stubs", "tenacity", "types-croniter", "types-dateparser", "types-python-dateutil", "types-pytz", "types-requests (==2.28.8)", "typing-extensions"] +dev = ["Faker", "PyGithub", "agate (==1.7.1)", "apache-airflow (==2.9.1)", "beautifulsoup4", "cryptography (>=42.0.4,<42.1.0)", "custom-materializations", "dbt-bigquery", "dbt-core", "dbt-duckdb (>=1.7.1)", "dbt-snowflake", "google-auth", "google-cloud-bigquery", "google-cloud-bigquery-storage", "mypy (>=1.10.0,<1.11.0)", "pandas-stubs", "pre-commit", "psycopg2-binary", "pydantic (<2.6.0)", "pyspark (>=3.5.0,<3.6.0)", "pytest", "pytest-asyncio (<0.23.0)", "pytest-mock", "pytest-retry", "pytest-xdist", "pytz", "ruff (>=0.4.0,<0.5.0)", "snowflake-connector-python[pandas,secure-local-storage] (>=3.0.2)", "sqlalchemy-stubs", "types-croniter", "types-dateparser", "types-python-dateutil", "types-pytz", "types-requests (==2.28.8)", "typing-extensions"] +dlt = ["dlt"] gcppostgres = ["cloud-sql-python-connector[pg8000]"] github = ["PyGithub"] llm = ["langchain", "openai"] @@ -6167,13 +6341,13 @@ tests = ["cython", "littleutils", "pygments", "pytest", "typeguard"] [[package]] name = "starlette" -version = "0.41.0" +version = "0.41.2" description = "The little ASGI library that shines." optional = false python-versions = ">=3.8" files = [ - {file = "starlette-0.41.0-py3-none-any.whl", hash = "sha256:a0193a3c413ebc9c78bff1c3546a45bb8c8bcb4a84cae8747d650a65bd37210a"}, - {file = "starlette-0.41.0.tar.gz", hash = "sha256:39cbd8768b107d68bfe1ff1672b38a2c38b49777de46d2a592841d58e3bf7c2a"}, + {file = "starlette-0.41.2-py3-none-any.whl", hash = "sha256:fbc189474b4731cf30fcef52f18a8d070e3f3b46c6a04c97579e85e6ffca942d"}, + {file = "starlette-0.41.2.tar.gz", hash = "sha256:9834fd799d1a87fd346deb76158668cfa0b0d56f85caefe8268e2d97c3468b62"}, ] [package.dependencies] @@ -6182,6 +6356,20 @@ anyio = ">=3.4.0,<5" [package.extras] full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7)", "pyyaml"] +[[package]] +name = "strictyaml" +version = "1.7.3" +description = "Strict, typed YAML parser" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "strictyaml-1.7.3-py3-none-any.whl", hash = "sha256:fb5c8a4edb43bebb765959e420f9b3978d7f1af88c80606c03fb420888f5d1c7"}, + {file = "strictyaml-1.7.3.tar.gz", hash = "sha256:22f854a5fcab42b5ddba8030a0e4be51ca89af0267961c8d6cfa86395586c407"}, +] + +[package.dependencies] +python-dateutil = ">=2.6.0" + [[package]] name = "structlog" version = "24.4.0" @@ -6226,13 +6414,13 @@ files = [ [[package]] name = "tenacity" -version = "9.0.0" +version = "8.5.0" description = "Retry code until it succeeds" optional = false python-versions = ">=3.8" files = [ - {file = "tenacity-9.0.0-py3-none-any.whl", hash = "sha256:93de0c98785b27fcf659856aa9f54bfbd399e29969b0621bc7f762bd441b4539"}, - {file = "tenacity-9.0.0.tar.gz", hash = "sha256:807f37ca97d62aa361264d497b0e31e92b8027044942bfa756160d908320d73b"}, + {file = "tenacity-8.5.0-py3-none-any.whl", hash = "sha256:b594c2a5945830c267ce6b79a166228323ed52718f30302c1359836112346687"}, + {file = "tenacity-8.5.0.tar.gz", hash = "sha256:8bc6c0c8a09b31e6cad13c47afbed1a567518250a9a171418582ed8d9c20ca78"}, ] [package.extras] @@ -6269,6 +6457,24 @@ typing-extensions = ">=4.4.0,<5.0.0" [package.extras] syntax = ["tree-sitter (>=0.20.1,<0.21.0)", "tree_sitter_languages (>=1.7.0)"] +[[package]] +name = "thrift" +version = "0.21.0" +description = "Python bindings for the Apache Thrift RPC system" +optional = false +python-versions = "*" +files = [ + {file = "thrift-0.21.0.tar.gz", hash = "sha256:5e6f7c50f936ebfa23e924229afc95eb219f8c8e5a83202dd4a391244803e402"}, +] + +[package.dependencies] +six = ">=1.7.2" + +[package.extras] +all = ["tornado (>=4.0)", "twisted"] +tornado = ["tornado (>=4.0)"] +twisted = ["twisted"] + [[package]] name = "tomli" version = "2.0.2" @@ -6335,13 +6541,13 @@ files = [ [[package]] name = "tqdm" -version = "4.66.5" +version = "4.66.6" description = "Fast, Extensible Progress Meter" optional = false python-versions = ">=3.7" files = [ - {file = "tqdm-4.66.5-py3-none-any.whl", hash = "sha256:90279a3770753eafc9194a0364852159802111925aa30eb3f9d85b0e805ac7cd"}, - {file = "tqdm-4.66.5.tar.gz", hash = "sha256:e1020aef2e5096702d8a025ac7d16b1577279c9d63f8375b63083e9a5f0fcbad"}, + {file = "tqdm-4.66.6-py3-none-any.whl", hash = "sha256:223e8b5359c2efc4b30555531f09e9f2f3589bcd7fdd389271191031b49b7a63"}, + {file = "tqdm-4.66.6.tar.gz", hash = "sha256:4bdd694238bef1485ce839d67967ab50af8f9272aab687c0d7702a01da0be090"}, ] [package.dependencies] @@ -6423,13 +6629,13 @@ files = [ [[package]] name = "types-setuptools" -version = "75.2.0.20241019" +version = "75.2.0.20241025" description = "Typing stubs for setuptools" optional = false python-versions = ">=3.8" files = [ - {file = "types-setuptools-75.2.0.20241019.tar.gz", hash = "sha256:86ea31b5f6df2c6b8f2dc8ae3f72b213607f62549b6fa2ed5866e5299f968694"}, - {file = "types_setuptools-75.2.0.20241019-py3-none-any.whl", hash = "sha256:2e48ff3acd4919471e80d5e3f049cce5c177e108d5d36d2d4cee3fa4d4104258"}, + {file = "types-setuptools-75.2.0.20241025.tar.gz", hash = "sha256:2949913a518d5285ce00a3b7d88961c80a6e72ffb8f3da0a3f5650ea533bd45e"}, + {file = "types_setuptools-75.2.0.20241025-py3-none-any.whl", hash = "sha256:6721ac0f1a620321e2ccd87a9a747c4a383dc381f78d894ce37f2455b45fcf1c"}, ] [[package]] @@ -6929,93 +7135,93 @@ h11 = ">=0.9.0,<1" [[package]] name = "yarl" -version = "1.16.0" +version = "1.17.0" description = "Yet another URL library" optional = false python-versions = ">=3.9" files = [ - {file = "yarl-1.16.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:32468f41242d72b87ab793a86d92f885355bcf35b3355aa650bfa846a5c60058"}, - {file = "yarl-1.16.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:234f3a3032b505b90e65b5bc6652c2329ea7ea8855d8de61e1642b74b4ee65d2"}, - {file = "yarl-1.16.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8a0296040e5cddf074c7f5af4a60f3fc42c0237440df7bcf5183be5f6c802ed5"}, - {file = "yarl-1.16.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:de6c14dd7c7c0badba48157474ea1f03ebee991530ba742d381b28d4f314d6f3"}, - {file = "yarl-1.16.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b140e532fe0266003c936d017c1ac301e72ee4a3fd51784574c05f53718a55d8"}, - {file = "yarl-1.16.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:019f5d58093402aa8f6661e60fd82a28746ad6d156f6c5336a70a39bd7b162b9"}, - {file = "yarl-1.16.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c42998fd1cbeb53cd985bff0e4bc25fbe55fd6eb3a545a724c1012d69d5ec84"}, - {file = "yarl-1.16.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7c7c30fb38c300fe8140df30a046a01769105e4cf4282567a29b5cdb635b66c4"}, - {file = "yarl-1.16.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:e49e0fd86c295e743fd5be69b8b0712f70a686bc79a16e5268386c2defacaade"}, - {file = "yarl-1.16.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:b9ca7b9147eb1365c8bab03c003baa1300599575effad765e0b07dd3501ea9af"}, - {file = "yarl-1.16.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:27e11db3f1e6a51081a981509f75617b09810529de508a181319193d320bc5c7"}, - {file = "yarl-1.16.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8994c42f4ca25df5380ddf59f315c518c81df6a68fed5bb0c159c6cb6b92f120"}, - {file = "yarl-1.16.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:542fa8e09a581bcdcbb30607c7224beff3fdfb598c798ccd28a8184ffc18b7eb"}, - {file = "yarl-1.16.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:2bd6a51010c7284d191b79d3b56e51a87d8e1c03b0902362945f15c3d50ed46b"}, - {file = "yarl-1.16.0-cp310-cp310-win32.whl", hash = "sha256:178ccb856e265174a79f59721031060f885aca428983e75c06f78aa24b91d929"}, - {file = "yarl-1.16.0-cp310-cp310-win_amd64.whl", hash = "sha256:fe8bba2545427418efc1929c5c42852bdb4143eb8d0a46b09de88d1fe99258e7"}, - {file = "yarl-1.16.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d8643975a0080f361639787415a038bfc32d29208a4bf6b783ab3075a20b1ef3"}, - {file = "yarl-1.16.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:676d96bafc8c2d0039cea0cd3fd44cee7aa88b8185551a2bb93354668e8315c2"}, - {file = "yarl-1.16.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d9525f03269e64310416dbe6c68d3b23e5d34aaa8f47193a1c45ac568cecbc49"}, - {file = "yarl-1.16.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8b37d5ec034e668b22cf0ce1074d6c21fd2a08b90d11b1b73139b750a8b0dd97"}, - {file = "yarl-1.16.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4f32c4cb7386b41936894685f6e093c8dfaf0960124d91fe0ec29fe439e201d0"}, - {file = "yarl-1.16.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5b8e265a0545637492a7e12fd7038370d66c9375a61d88c5567d0e044ded9202"}, - {file = "yarl-1.16.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:789a3423f28a5fff46fbd04e339863c169ece97c827b44de16e1a7a42bc915d2"}, - {file = "yarl-1.16.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f1d1f45e3e8d37c804dca99ab3cf4ab3ed2e7a62cd82542924b14c0a4f46d243"}, - {file = "yarl-1.16.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:621280719c4c5dad4c1391160a9b88925bb8b0ff6a7d5af3224643024871675f"}, - {file = "yarl-1.16.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:ed097b26f18a1f5ff05f661dc36528c5f6735ba4ce8c9645e83b064665131349"}, - {file = "yarl-1.16.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:2f1fe2b2e3ee418862f5ebc0c0083c97f6f6625781382f828f6d4e9b614eba9b"}, - {file = "yarl-1.16.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:87dd10bc0618991c66cee0cc65fa74a45f4ecb13bceec3c62d78ad2e42b27a16"}, - {file = "yarl-1.16.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:4199db024b58a8abb2cfcedac7b1292c3ad421684571aeb622a02f242280e8d6"}, - {file = "yarl-1.16.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:99a9dcd4b71dd5f5f949737ab3f356cfc058c709b4f49833aeffedc2652dac56"}, - {file = "yarl-1.16.0-cp311-cp311-win32.whl", hash = "sha256:a9394c65ae0ed95679717d391c862dece9afacd8fa311683fc8b4362ce8a410c"}, - {file = "yarl-1.16.0-cp311-cp311-win_amd64.whl", hash = "sha256:5b9101f528ae0f8f65ac9d64dda2bb0627de8a50344b2f582779f32fda747c1d"}, - {file = "yarl-1.16.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:4ffb7c129707dd76ced0a4a4128ff452cecf0b0e929f2668ea05a371d9e5c104"}, - {file = "yarl-1.16.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:1a5e9d8ce1185723419c487758d81ac2bde693711947032cce600ca7c9cda7d6"}, - {file = "yarl-1.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:d743e3118b2640cef7768ea955378c3536482d95550222f908f392167fe62059"}, - {file = "yarl-1.16.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:26768342f256e6e3c37533bf9433f5f15f3e59e3c14b2409098291b3efaceacb"}, - {file = "yarl-1.16.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d1b0796168b953bca6600c5f97f5ed407479889a36ad7d17183366260f29a6b9"}, - {file = "yarl-1.16.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:858728086914f3a407aa7979cab743bbda1fe2bdf39ffcd991469a370dd7414d"}, - {file = "yarl-1.16.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5570e6d47bcb03215baf4c9ad7bf7c013e56285d9d35013541f9ac2b372593e7"}, - {file = "yarl-1.16.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:66ea8311422a7ba1fc79b4c42c2baa10566469fe5a78500d4e7754d6e6db8724"}, - {file = "yarl-1.16.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:649bddcedee692ee8a9b7b6e38582cb4062dc4253de9711568e5620d8707c2a3"}, - {file = "yarl-1.16.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:3a91654adb7643cb21b46f04244c5a315a440dcad63213033826549fa2435f71"}, - {file = "yarl-1.16.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:b439cae82034ade094526a8f692b9a2b5ee936452de5e4c5f0f6c48df23f8604"}, - {file = "yarl-1.16.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:571f781ae8ac463ce30bacebfaef2c6581543776d5970b2372fbe31d7bf31a07"}, - {file = "yarl-1.16.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:aa7943f04f36d6cafc0cf53ea89824ac2c37acbdb4b316a654176ab8ffd0f968"}, - {file = "yarl-1.16.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1a5cf32539373ff39d97723e39a9283a7277cbf1224f7aef0c56c9598b6486c3"}, - {file = "yarl-1.16.0-cp312-cp312-win32.whl", hash = "sha256:a5b6c09b9b4253d6a208b0f4a2f9206e511ec68dce9198e0fbec4f160137aa67"}, - {file = "yarl-1.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:1208ca14eed2fda324042adf8d6c0adf4a31522fa95e0929027cd487875f0240"}, - {file = "yarl-1.16.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a5ace0177520bd4caa99295a9b6fb831d0e9a57d8e0501a22ffaa61b4c024283"}, - {file = "yarl-1.16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:7118bdb5e3ed81acaa2095cba7ec02a0fe74b52a16ab9f9ac8e28e53ee299732"}, - {file = "yarl-1.16.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:38fec8a2a94c58bd47c9a50a45d321ab2285ad133adefbbadf3012c054b7e656"}, - {file = "yarl-1.16.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8791d66d81ee45866a7bb15a517b01a2bcf583a18ebf5d72a84e6064c417e64b"}, - {file = "yarl-1.16.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1cf936ba67bc6c734f3aa1c01391da74ab7fc046a9f8bbfa230b8393b90cf472"}, - {file = "yarl-1.16.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d1aab176dd55b59f77a63b27cffaca67d29987d91a5b615cbead41331e6b7428"}, - {file = "yarl-1.16.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:995d0759004c08abd5d1b81300a91d18c8577c6389300bed1c7c11675105a44d"}, - {file = "yarl-1.16.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1bc22e00edeb068f71967ab99081e9406cd56dbed864fc3a8259442999d71552"}, - {file = "yarl-1.16.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:35b4f7842154176523e0a63c9b871168c69b98065d05a4f637fce342a6a2693a"}, - {file = "yarl-1.16.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:7ace71c4b7a0c41f317ae24be62bb61e9d80838d38acb20e70697c625e71f120"}, - {file = "yarl-1.16.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:8f639e3f5795a6568aa4f7d2ac6057c757dcd187593679f035adbf12b892bb00"}, - {file = "yarl-1.16.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:e8be3aff14f0120ad049121322b107f8a759be76a6a62138322d4c8a337a9e2c"}, - {file = "yarl-1.16.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:122d8e7986043d0549e9eb23c7fd23be078be4b70c9eb42a20052b3d3149c6f2"}, - {file = "yarl-1.16.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:0fd9c227990f609c165f56b46107d0bc34553fe0387818c42c02f77974402c36"}, - {file = "yarl-1.16.0-cp313-cp313-win32.whl", hash = "sha256:595ca5e943baed31d56b33b34736461a371c6ea0038d3baec399949dd628560b"}, - {file = "yarl-1.16.0-cp313-cp313-win_amd64.whl", hash = "sha256:921b81b8d78f0e60242fb3db615ea3f368827a76af095d5a69f1c3366db3f596"}, - {file = "yarl-1.16.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:ab2b2ac232110a1fdb0d3ffcd087783edd3d4a6ced432a1bf75caf7b7be70916"}, - {file = "yarl-1.16.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:7f8713717a09acbfee7c47bfc5777e685539fefdd34fa72faf504c8be2f3df4e"}, - {file = "yarl-1.16.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cdcffe1dbcb4477d2b4202f63cd972d5baa155ff5a3d9e35801c46a415b7f71a"}, - {file = "yarl-1.16.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9a91217208306d82357c67daeef5162a41a28c8352dab7e16daa82e3718852a7"}, - {file = "yarl-1.16.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3ab3ed42c78275477ea8e917491365e9a9b69bb615cb46169020bd0aa5e2d6d3"}, - {file = "yarl-1.16.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:707ae579ccb3262dfaef093e202b4c3fb23c3810e8df544b1111bd2401fd7b09"}, - {file = "yarl-1.16.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ad7a852d1cd0b8d8b37fc9d7f8581152add917a98cfe2ea6e241878795f917ae"}, - {file = "yarl-1.16.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d3f1cc3d3d4dc574bebc9b387f6875e228ace5748a7c24f49d8f01ac1bc6c31b"}, - {file = "yarl-1.16.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:5ff96da263740779b0893d02b718293cc03400c3a208fc8d8cd79d9b0993e532"}, - {file = "yarl-1.16.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:3d375a19ba2bfe320b6d873f3fb165313b002cef8b7cc0a368ad8b8a57453837"}, - {file = "yarl-1.16.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:62c7da0ad93a07da048b500514ca47b759459ec41924143e2ddb5d7e20fd3db5"}, - {file = "yarl-1.16.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:147b0fcd0ee33b4b5f6edfea80452d80e419e51b9a3f7a96ce98eaee145c1581"}, - {file = "yarl-1.16.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:504e1fe1cc4f170195320eb033d2b0ccf5c6114ce5bf2f617535c01699479bca"}, - {file = "yarl-1.16.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:bdcf667a5dec12a48f669e485d70c54189f0639c2157b538a4cffd24a853624f"}, - {file = "yarl-1.16.0-cp39-cp39-win32.whl", hash = "sha256:e9951afe6557c75a71045148890052cb942689ee4c9ec29f5436240e1fcc73b7"}, - {file = "yarl-1.16.0-cp39-cp39-win_amd64.whl", hash = "sha256:7d7aaa8ff95d0840e289423e7dc35696c2b058d635f945bf05b5cd633146b027"}, - {file = "yarl-1.16.0-py3-none-any.whl", hash = "sha256:e6980a558d8461230c457218bd6c92dfc1d10205548215c2c21d79dc8d0a96f3"}, - {file = "yarl-1.16.0.tar.gz", hash = "sha256:b6f687ced5510a9a2474bbae96a4352e5ace5fa34dc44a217b0537fec1db00b4"}, + {file = "yarl-1.17.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2d8715edfe12eee6f27f32a3655f38d6c7410deb482158c0b7d4b7fad5d07628"}, + {file = "yarl-1.17.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1803bf2a7a782e02db746d8bd18f2384801bc1d108723840b25e065b116ad726"}, + {file = "yarl-1.17.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e66589110e20c2951221a938fa200c7aa134a8bdf4e4dc97e6b21539ff026d4"}, + {file = "yarl-1.17.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7069d411cfccf868e812497e0ec4acb7c7bf8d684e93caa6c872f1e6f5d1664d"}, + {file = "yarl-1.17.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cbf70ba16118db3e4b0da69dcde9d4d4095d383c32a15530564c283fa38a7c52"}, + {file = "yarl-1.17.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0bc53cc349675b32ead83339a8de79eaf13b88f2669c09d4962322bb0f064cbc"}, + {file = "yarl-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d6aa18a402d1c80193ce97c8729871f17fd3e822037fbd7d9b719864018df746"}, + {file = "yarl-1.17.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d89c5bc701861cfab357aa0cd039bc905fe919997b8c312b4b0c358619c38d4d"}, + {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b728bdf38ca58f2da1d583e4af4ba7d4cd1a58b31a363a3137a8159395e7ecc7"}, + {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:5542e57dc15d5473da5a39fbde14684b0cc4301412ee53cbab677925e8497c11"}, + {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e564b57e5009fb150cb513804d7e9e9912fee2e48835638f4f47977f88b4a39c"}, + {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:eb3c4cff524b4c1c1dba3a6da905edb1dfd2baf6f55f18a58914bbb2d26b59e1"}, + {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:05e13f389038842da930d439fbed63bdce3f7644902714cb68cf527c971af804"}, + {file = "yarl-1.17.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:153c38ee2b4abba136385af4467459c62d50f2a3f4bde38c7b99d43a20c143ef"}, + {file = "yarl-1.17.0-cp310-cp310-win32.whl", hash = "sha256:4065b4259d1ae6f70fd9708ffd61e1c9c27516f5b4fae273c41028afcbe3a094"}, + {file = "yarl-1.17.0-cp310-cp310-win_amd64.whl", hash = "sha256:abf366391a02a8335c5c26163b5fe6f514cc1d79e74d8bf3ffab13572282368e"}, + {file = "yarl-1.17.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:19a4fe0279626c6295c5b0c8c2bb7228319d2e985883621a6e87b344062d8135"}, + {file = "yarl-1.17.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cadd0113f4db3c6b56868d6a19ca6286f5ccfa7bc08c27982cf92e5ed31b489a"}, + {file = "yarl-1.17.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:60d6693eef43215b1ccfb1df3f6eae8db30a9ff1e7989fb6b2a6f0b468930ee8"}, + {file = "yarl-1.17.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5bb8bf3843e1fa8cf3fe77813c512818e57368afab7ebe9ef02446fe1a10b492"}, + {file = "yarl-1.17.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d2a5b35fd1d8d90443e061d0c8669ac7600eec5c14c4a51f619e9e105b136715"}, + {file = "yarl-1.17.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c5bf17b32f392df20ab5c3a69d37b26d10efaa018b4f4e5643c7520d8eee7ac7"}, + {file = "yarl-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48f51b529b958cd06e78158ff297a8bf57b4021243c179ee03695b5dbf9cb6e1"}, + {file = "yarl-1.17.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5fcaa06bf788e19f913d315d9c99a69e196a40277dc2c23741a1d08c93f4d430"}, + {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:32f3ee19ff0f18a7a522d44e869e1ebc8218ad3ae4ebb7020445f59b4bbe5897"}, + {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_armv7l.whl", hash = "sha256:a4fb69a81ae2ec2b609574ae35420cf5647d227e4d0475c16aa861dd24e840b0"}, + {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:7bacc8b77670322132a1b2522c50a1f62991e2f95591977455fd9a398b4e678d"}, + {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:437bf6eb47a2d20baaf7f6739895cb049e56896a5ffdea61a4b25da781966e8b"}, + {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:30534a03c87484092080e3b6e789140bd277e40f453358900ad1f0f2e61fc8ec"}, + {file = "yarl-1.17.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b30df4ff98703649915144be6f0df3b16fd4870ac38a09c56d5d9e54ff2d5f96"}, + {file = "yarl-1.17.0-cp311-cp311-win32.whl", hash = "sha256:263b487246858e874ab53e148e2a9a0de8465341b607678106829a81d81418c6"}, + {file = "yarl-1.17.0-cp311-cp311-win_amd64.whl", hash = "sha256:07055a9e8b647a362e7d4810fe99d8f98421575e7d2eede32e008c89a65a17bd"}, + {file = "yarl-1.17.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:84095ab25ba69a8fa3fb4936e14df631b8a71193fe18bd38be7ecbe34d0f5512"}, + {file = "yarl-1.17.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:02608fb3f6df87039212fc746017455ccc2a5fc96555ee247c45d1e9f21f1d7b"}, + {file = "yarl-1.17.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:13468d291fe8c12162b7cf2cdb406fe85881c53c9e03053ecb8c5d3523822cd9"}, + {file = "yarl-1.17.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8da3f8f368fb7e2f052fded06d5672260c50b5472c956a5f1bd7bf474ae504ab"}, + {file = "yarl-1.17.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ec0507ab6523980bed050137007c76883d941b519aca0e26d4c1ec1f297dd646"}, + {file = "yarl-1.17.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08fc76df7fd8360e9ff30e6ccc3ee85b8dbd6ed5d3a295e6ec62bcae7601b932"}, + {file = "yarl-1.17.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d522f390686acb6bab2b917dd9ca06740c5080cd2eaa5aef8827b97e967319d"}, + {file = "yarl-1.17.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:147c527a80bb45b3dcd6e63401af8ac574125d8d120e6afe9901049286ff64ef"}, + {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:24cf43bcd17a0a1f72284e47774f9c60e0bf0d2484d5851f4ddf24ded49f33c6"}, + {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:c28a44b9e0fba49c3857360e7ad1473fc18bc7f6659ca08ed4f4f2b9a52c75fa"}, + {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:350cacb2d589bc07d230eb995d88fcc646caad50a71ed2d86df533a465a4e6e1"}, + {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:fd1ab1373274dea1c6448aee420d7b38af163b5c4732057cd7ee9f5454efc8b1"}, + {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:4934e0f96dadc567edc76d9c08181633c89c908ab5a3b8f698560124167d9488"}, + {file = "yarl-1.17.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8d0a278170d75c88e435a1ce76557af6758bfebc338435b2eba959df2552163e"}, + {file = "yarl-1.17.0-cp312-cp312-win32.whl", hash = "sha256:61584f33196575a08785bb56db6b453682c88f009cd9c6f338a10f6737ce419f"}, + {file = "yarl-1.17.0-cp312-cp312-win_amd64.whl", hash = "sha256:9987a439ad33a7712bd5bbd073f09ad10d38640425fa498ecc99d8aa064f8fc4"}, + {file = "yarl-1.17.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:8deda7b8eb15a52db94c2014acdc7bdd14cb59ec4b82ac65d2ad16dc234a109e"}, + {file = "yarl-1.17.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56294218b348dcbd3d7fce0ffd79dd0b6c356cb2a813a1181af730b7c40de9e7"}, + {file = "yarl-1.17.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1fab91292f51c884b290ebec0b309a64a5318860ccda0c4940e740425a67b6b7"}, + {file = "yarl-1.17.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cf93fa61ff4d9c7d40482ce1a2c9916ca435e34a1b8451e17f295781ccc034f"}, + {file = "yarl-1.17.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:261be774a0d71908c8830c33bacc89eef15c198433a8cc73767c10eeeb35a7d0"}, + {file = "yarl-1.17.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:deec9693b67f6af856a733b8a3e465553ef09e5e8ead792f52c25b699b8f9e6e"}, + {file = "yarl-1.17.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c804b07622ba50a765ca7fb8145512836ab65956de01307541def869e4a456c9"}, + {file = "yarl-1.17.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1d013a7c9574e98c14831a8f22d27277688ec3b2741d0188ac01a910b009987a"}, + {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e2cfcba719bd494c7413dcf0caafb51772dec168c7c946e094f710d6aa70494e"}, + {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:c068aba9fc5b94dfae8ea1cedcbf3041cd4c64644021362ffb750f79837e881f"}, + {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:3616df510ffac0df3c9fa851a40b76087c6c89cbcea2de33a835fc80f9faac24"}, + {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:755d6176b442fba9928a4df787591a6a3d62d4969f05c406cad83d296c5d4e05"}, + {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:c18f6e708d1cf9ff5b1af026e697ac73bea9cb70ee26a2b045b112548579bed2"}, + {file = "yarl-1.17.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5b937c216b6dee8b858c6afea958de03c5ff28406257d22b55c24962a2baf6fd"}, + {file = "yarl-1.17.0-cp313-cp313-win32.whl", hash = "sha256:d0131b14cb545c1a7bd98f4565a3e9bdf25a1bd65c83fc156ee5d8a8499ec4a3"}, + {file = "yarl-1.17.0-cp313-cp313-win_amd64.whl", hash = "sha256:01c96efa4313c01329e88b7e9e9e1b2fc671580270ddefdd41129fa8d0db7696"}, + {file = "yarl-1.17.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:0d44f67e193f0a7acdf552ecb4d1956a3a276c68e7952471add9f93093d1c30d"}, + {file = "yarl-1.17.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:16ea0aa5f890cdcb7ae700dffa0397ed6c280840f637cd07bffcbe4b8d68b985"}, + {file = "yarl-1.17.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cf5469dc7dcfa65edf5cc3a6add9f84c5529c6b556729b098e81a09a92e60e51"}, + {file = "yarl-1.17.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e662bf2f6e90b73cf2095f844e2bc1fda39826472a2aa1959258c3f2a8500a2f"}, + {file = "yarl-1.17.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8260e88f1446904ba20b558fa8ce5d0ab9102747238e82343e46d056d7304d7e"}, + {file = "yarl-1.17.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5dc16477a4a2c71e64c5d3d15d7ae3d3a6bb1e8b955288a9f73c60d2a391282f"}, + {file = "yarl-1.17.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46027e326cecd55e5950184ec9d86c803f4f6fe4ba6af9944a0e537d643cdbe0"}, + {file = "yarl-1.17.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fc95e46c92a2b6f22e70afe07e34dbc03a4acd07d820204a6938798b16f4014f"}, + {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:16ca76c7ac9515320cd09d6cc083d8d13d1803f6ebe212b06ea2505fd66ecff8"}, + {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_armv7l.whl", hash = "sha256:eb1a5b97388f2613f9305d78a3473cdf8d80c7034e554d8199d96dcf80c62ac4"}, + {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:41fd5498975418cdc34944060b8fbeec0d48b2741068077222564bea68daf5a6"}, + {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_ppc64le.whl", hash = "sha256:146ca582ed04a5664ad04b0e0603934281eaab5c0115a5a46cce0b3c061a56a1"}, + {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_s390x.whl", hash = "sha256:6abb8c06107dbec97481b2392dafc41aac091a5d162edf6ed7d624fe7da0587a"}, + {file = "yarl-1.17.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:4d14be4613dd4f96c25feb4bd8c0d8ce0f529ab0ae555a17df5789e69d8ec0c5"}, + {file = "yarl-1.17.0-cp39-cp39-win32.whl", hash = "sha256:174d6a6cad1068f7850702aad0c7b1bca03bcac199ca6026f84531335dfc2646"}, + {file = "yarl-1.17.0-cp39-cp39-win_amd64.whl", hash = "sha256:6af417ca2c7349b101d3fd557ad96b4cd439fdb6ab0d288e3f64a068eea394d0"}, + {file = "yarl-1.17.0-py3-none-any.whl", hash = "sha256:62dd42bb0e49423f4dd58836a04fcf09c80237836796025211bbe913f1524993"}, + {file = "yarl-1.17.0.tar.gz", hash = "sha256:d3f13583f378930377e02002b4085a3d025b00402d5a80911726d43a67911cd9"}, ] [package.dependencies] @@ -7168,4 +7374,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = "^3.12,<3.13" -content-hash = "d837ca847d33d9202dc4569d9ab3a8ffa6d187111bc5752ce7342ce130a69a79" +content-hash = "5bad86addcaf19f8bea807e45b8fac3bc3e1480ddd67d830333e797279f82c54" diff --git a/pyproject.toml b/pyproject.toml index 999c69c4f..a1a3636ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,7 @@ sqlalchemy = "^2.0.25" textual = "^0.52.1" redis = "^5.0.7" githubkit = "^0.11.6" -sqlmesh = { extras = ["trino"], version = "^0.125.0" } +sqlmesh = {extras = ["trino"], version = "^0.129.0"} dagster-duckdb = "^0.24.0" dagster-duckdb-polars = "^0.24.0" google-cloud-bigquery-storage = "^2.25.0" @@ -62,6 +62,7 @@ dagster-sqlmesh = "0.2.0.dev3" google-auth = "^2.34.0" pillow = "^10.4.0" dagster-k8s = "^0.24.6" +pyiceberg = { extras = ["hive"], version = "^0.7.1" } [tool.poetry.scripts] diff --git a/warehouse/metrics_mesh/models/metrics_factories.py b/warehouse/metrics_mesh/models/metrics_factories.py index 784237899..2c045606e 100644 --- a/warehouse/metrics_mesh/models/metrics_factories.py +++ b/warehouse/metrics_mesh/models/metrics_factories.py @@ -1,49 +1,9 @@ -from metrics_tools.lib.factories import ( - MetricQueryDef, +from metrics_tools.factory import ( timeseries_metrics, + MetricQueryDef, RollingConfig, ) - -# daily_timeseries_rolling_window_model( -# model_name="metrics.timeseries_metrics_by_artifact_over_30_days", -# metric_queries={ -# "developer_active_days": MetricQueryDef( -# ref="active_days.sql", -# vars={ -# "activity_event_types": ["COMMIT_CODE"], -# }, -# ), -# "developer_classifications": MetricQueryDef( -# ref="developer_activity_classification.sql", -# vars={"full_time_days": 10}, -# ), -# "contributor_active_days": MetricQueryDef( -# ref="active_days.sql", -# vars={ -# "activity_event_types": [ -# "COMMIT_CODE", -# "ISSUE_OPENED", -# "PULL_REQUEST_OPENED", -# ], -# }, -# ), -# "contributor_classifications": MetricQueryDef( -# ref="contributor_activity_classification.sql", -# vars={"full_time_days": 10}, -# ), -# "stars": MetricQueryDef( -# ref="stars.sql", -# vars={}, -# ), -# }, -# trailing_days=30, -# model_options=dict( -# start="2015-01-01", -# cron="@daily", -# ), -# ) - timeseries_metrics( start="2015-01-01", model_prefix="timeseries", @@ -115,11 +75,25 @@ ref="gas_fees.sql", time_aggregations=["daily", "weekly", "monthly"], ), - "change_in_developers": MetricQueryDef( + "change_in_30_developer_activity": MetricQueryDef( + vars={ + "comparison_interval": 30, + }, ref="change_in_developers.sql", rolling=RollingConfig( - windows=[30, 60, 90], - unit="day", + windows=[2], + unit="period", + cron="@daily", + ), + ), + "change_in_60_developer_activity": MetricQueryDef( + vars={ + "comparison_interval": 60, + }, + ref="change_in_developers.sql", + rolling=RollingConfig( + windows=[2], + unit="period", cron="@daily", ), ), diff --git a/warehouse/metrics_mesh/oso_metrics/active_addresses.sql b/warehouse/metrics_mesh/oso_metrics/active_addresses.sql index 950d1f060..27460383d 100644 --- a/warehouse/metrics_mesh/oso_metrics/active_addresses.sql +++ b/warehouse/metrics_mesh/oso_metrics/active_addresses.sql @@ -6,7 +6,7 @@ select @metrics_sample_date(events.bucket_day) as metrics_sample_date, COUNT(distinct events.from_artifact_id) as amount from metrics.events_daily_to_artifact as events where event_type in ('CONTRACT_INVOCATION_SUCCESS_DAILY_COUNT') - and events.bucket_day BETWEEN @metrics_start(DATE) AND @metrics_end(DATE) + and events.bucket_day BETWEEN @metrics_start('DATE') AND @metrics_end('DATE') group by 1, metric, from_artifact_id, diff --git a/warehouse/metrics_mesh/oso_metrics/active_days.sql b/warehouse/metrics_mesh/oso_metrics/active_days.sql index 3e3506a70..ac1e86d51 100644 --- a/warehouse/metrics_mesh/oso_metrics/active_days.sql +++ b/warehouse/metrics_mesh/oso_metrics/active_days.sql @@ -6,7 +6,7 @@ select @metrics_sample_date(events.bucket_day) as metrics_sample_date, COUNT(DISTINCT events.bucket_day) amount from metrics.events_daily_to_artifact as events where event_type in @activity_event_types - and events.bucket_day BETWEEN @metrics_start(DATE) AND @metrics_end(DATE) + and events.bucket_day BETWEEN @metrics_start('DATE') AND @metrics_end('DATE') group by 1, metric, from_artifact_id, diff --git a/warehouse/metrics_mesh/oso_metrics/change_in_developers.sql b/warehouse/metrics_mesh/oso_metrics/change_in_developers.sql index e41d74372..4c4ea7275 100644 --- a/warehouse/metrics_mesh/oso_metrics/change_in_developers.sql +++ b/warehouse/metrics_mesh/oso_metrics/change_in_developers.sql @@ -7,15 +7,15 @@ WITH latest AS ( ), classification.metric, classification.amount - FROM metrics_peer_ref( + FROM @metrics_peer_ref( developer_classifications, - window := @rolling_window, - unit := @rolling_unit + window := @comparison_interval, + unit := 'day', ) as classification WHERE classification.metrics_sample_date = @relative_window_sample_date( - @metrics_end(DATE), - @rolling_window, - @rolling_unit, + @metrics_end('DATE'), + @comparison_interval, + 'day', 0 ) ), @@ -24,30 +24,31 @@ previous AS ( classification.event_source, @metrics_entity_type_col( 'to_{entity_type}_id', - table_alias := classification + table_alias := classification, + include_column_alias := true, ), classification.metric, classification.amount - FROM metrics_peer_ref( + FROM @metrics_peer_ref( developer_classifications, - window := @rolling_window, - unit := @rolling_unit + window := @comparison_interval, + unit := 'day' ) as classification WHERE classification.metrics_sample_date = @relative_window_sample_date( - @metrics_end(DATE), - @rolling_window, - @rolling_unit, + @metrics_end('DATE'), + @comparison_interval, + 'day', -1 ) ) -select @metrics_end(DATE) as metrics_sample_date, +select @metrics_end('DATE') as metrics_sample_date, COALESCE(latest.event_source, previous.event_source) as event_source, @metrics_entity_type_alias( COALESCE( @metrics_entity_type_col('to_{entity_type}_id', table_alias := latest), @metrics_entity_type_col('to_{entity_type}_id', table_alias := previous) ), - 'to_{entity_type}_id' + 'to_{entity_type}_id', ), '' as from_artifact_id, @metrics_name( diff --git a/warehouse/metrics_mesh/oso_metrics/commits.sql b/warehouse/metrics_mesh/oso_metrics/commits.sql index d8018f2dd..04585c3bb 100644 --- a/warehouse/metrics_mesh/oso_metrics/commits.sql +++ b/warehouse/metrics_mesh/oso_metrics/commits.sql @@ -6,7 +6,7 @@ select @metrics_sample_date(events.bucket_day) as metrics_sample_date, SUM(events.amount) as amount from metrics.events_daily_to_artifact as events where event_type in ('COMMIT_CODE') - and events.bucket_day BETWEEN @metrics_start(DATE) AND @metrics_end(DATE) + and events.bucket_day BETWEEN @metrics_start('DATE') AND @metrics_end('DATE') group by 1, metric, from_artifact_id, diff --git a/warehouse/metrics_mesh/oso_metrics/contributor_activity_classification.sql b/warehouse/metrics_mesh/oso_metrics/contributor_activity_classification.sql index 7982384d2..5691b9315 100644 --- a/warehouse/metrics_mesh/oso_metrics/contributor_activity_classification.sql +++ b/warehouse/metrics_mesh/oso_metrics/contributor_activity_classification.sql @@ -1,33 +1,46 @@ select active.metrics_sample_date, active.event_source, - @metrics_entity_type_col('to_{entity_type}_id', table_alias := active), + @metrics_entity_type_col( + 'to_{entity_type}_id', + table_alias := active, + include_column_alias := true, + ), '' as from_artifact_id, @metric_name('full_time_contributors') as metric, COUNT(DISTINCT active.from_artifact_id) as amount -from metrics_peer_ref( +from @metrics_peer_ref( developer_active_days, window := @rolling_window, unit := @rolling_unit ) as active where active.amount / @rolling_window >= @full_time_ratio + and active.metrics_sample_date = @metrics_end('DATE') group by metric, from_artifact_id, - @metrics_entity_type_col('to_{entity_type}_id', table_alias := active), + @metrics_entity_type_col( + 'to_{entity_type}_id', + table_alias := active, + ), event_source, metrics_sample_date union all select active.metrics_sample_date, active.event_source, - @metrics_entity_type_col('to_{entity_type}_id', table_alias := active), + @metrics_entity_type_col( + 'to_{entity_type}_id', + table_alias := active, + include_column_alias := true, + ), '' as from_artifact_id, @metric_name('part_time_contributors') as metric, COUNT(DISTINCT active.from_artifact_id) as amount -from metrics_peer_ref( +from @metrics_peer_ref( developer_active_days, window := @rolling_window, unit := @rolling_unit ) as active where active.amount / @rolling_window < @full_time_ratio + and active.metrics_sample_date = @metrics_end('DATE') group by metric, from_artifact_id, @metrics_entity_type_col('to_{entity_type}_id', table_alias := active), @@ -36,15 +49,20 @@ group by metric, union all select active.metrics_sample_date, active.event_source, - @metrics_entity_type_col('to_{entity_type}_id', table_alias := active), + @metrics_entity_type_col( + 'to_{entity_type}_id', + table_alias := active, + include_column_alias := true, + ), '' as from_artifact_id, @metric_name('active_contributors') as metric, COUNT(DISTINCT active.from_artifact_id) as amount -from metrics_peer_ref( +from @metrics_peer_ref( developer_active_days, window := @rolling_window, unit := @rolling_unit ) as active +where active.metrics_sample_date = @metrics_end('DATE') group by metric, from_artifact_id, @metrics_entity_type_col('to_{entity_type}_id', table_alias := active), diff --git a/warehouse/metrics_mesh/oso_metrics/developer_activity_classification.sql b/warehouse/metrics_mesh/oso_metrics/developer_activity_classification.sql index 6f85e2c31..9dcae9bf9 100644 --- a/warehouse/metrics_mesh/oso_metrics/developer_activity_classification.sql +++ b/warehouse/metrics_mesh/oso_metrics/developer_activity_classification.sql @@ -1,33 +1,46 @@ select active.metrics_sample_date, active.event_source, - @metrics_entity_type_col('to_{entity_type}_id', table_alias := active), + @metrics_entity_type_col( + 'to_{entity_type}_id', + table_alias := active, + include_column_alias := true, + ), '' as from_artifact_id, @metric_name('full_time_developers') as metric, COUNT(DISTINCT active.from_artifact_id) as amount -from metrics_peer_ref( +from @metrics_peer_ref( developer_active_days, window := @rolling_window, unit := @rolling_unit ) as active where active.amount / @rolling_window >= @full_time_ratio + and active.metrics_sample_date = @metrics_end('DATE') group by metric, from_artifact_id, - @metrics_entity_type_col('to_{entity_type}_id', table_alias := active), + @metrics_entity_type_col( + 'to_{entity_type}_id', + table_alias := active, + ), event_source, metrics_sample_date union all select active.metrics_sample_date, active.event_source, - @metrics_entity_type_col('to_{entity_type}_id', table_alias := active), + @metrics_entity_type_col( + 'to_{entity_type}_id', + table_alias := active, + include_column_alias := true, + ), '' as from_artifact_id, @metric_name('part_time_developers') as metric, COUNT(DISTINCT active.from_artifact_id) as amount -from metrics_peer_ref( +from @metrics_peer_ref( developer_active_days, window := @rolling_window, unit := @rolling_unit ) as active where active.amount / @rolling_window < @full_time_ratio + and active.metrics_sample_date = @metrics_end('DATE') group by metric, from_artifact_id, @metrics_entity_type_col('to_{entity_type}_id', table_alias := active), @@ -36,11 +49,15 @@ group by metric, union all select active.metrics_sample_date, active.event_source, - @metrics_entity_type_col('to_{entity_type}_id', table_alias := active), + @metrics_entity_type_col( + 'to_{entity_type}_id', + table_alias := active, + include_column_alias := true, + ), '' as from_artifact_id, @metric_name('active_developers') as metric, COUNT(DISTINCT active.from_artifact_id) as amount -from metrics_peer_ref( +from @metrics_peer_ref( developer_active_days, window := @rolling_window, unit := @rolling_unit diff --git a/warehouse/metrics_mesh/oso_metrics/forks.sql b/warehouse/metrics_mesh/oso_metrics/forks.sql index 3fa6fcdf5..f4b73cc5c 100644 --- a/warehouse/metrics_mesh/oso_metrics/forks.sql +++ b/warehouse/metrics_mesh/oso_metrics/forks.sql @@ -6,7 +6,7 @@ select @metrics_sample_date(events.bucket_day) as metrics_sample_date, SUM(events.amount) as amount, from metrics.events_daily_to_artifact as events where event_type in ('FORKED') - and events.bucket_day BETWEEN @metrics_start(DATE) AND @metrics_end(DATE) + and events.bucket_day BETWEEN @metrics_start('DATE') AND @metrics_end('DATE') group by 1, metric, from_artifact_id, diff --git a/warehouse/metrics_mesh/oso_metrics/gas_fees.sql b/warehouse/metrics_mesh/oso_metrics/gas_fees.sql index ba3f0e255..ccf1e03d2 100644 --- a/warehouse/metrics_mesh/oso_metrics/gas_fees.sql +++ b/warehouse/metrics_mesh/oso_metrics/gas_fees.sql @@ -6,7 +6,7 @@ select @metrics_sample_date(events.bucket_day) as metrics_sample_date, SUM(events.amount) as amount from metrics.events_daily_to_artifact as events where event_type in ('CONTRACT_INVOCATION_DAILY_L2_GAS_USED') - and events.bucket_day BETWEEN @metrics_start(DATE) AND @metrics_end(DATE) + and events.bucket_day BETWEEN @metrics_start('DATE') AND @metrics_end('DATE') group by 1, metric, from_artifact_id, diff --git a/warehouse/metrics_mesh/oso_metrics/issues_closed.sql b/warehouse/metrics_mesh/oso_metrics/issues_closed.sql index a0609b88c..7a6571ae5 100644 --- a/warehouse/metrics_mesh/oso_metrics/issues_closed.sql +++ b/warehouse/metrics_mesh/oso_metrics/issues_closed.sql @@ -6,7 +6,7 @@ select @metrics_sample_date(events.bucket_day) as metrics_sample_date, SUM(events.amount) as amount from metrics.events_daily_to_artifact as events where event_type in ('ISSUE_CLOSED') - and events.bucket_day BETWEEN @metrics_start(DATE) AND @metrics_end(DATE) + and events.bucket_day BETWEEN @metrics_start('DATE') AND @metrics_end('DATE') group by 1, metric, from_artifact_id, diff --git a/warehouse/metrics_mesh/oso_metrics/issues_opened.sql b/warehouse/metrics_mesh/oso_metrics/issues_opened.sql index 124b4c573..88cedbeb6 100644 --- a/warehouse/metrics_mesh/oso_metrics/issues_opened.sql +++ b/warehouse/metrics_mesh/oso_metrics/issues_opened.sql @@ -6,7 +6,7 @@ select @metrics_sample_date(events.bucket_day) as metrics_sample_date, SUM(events.amount) as amount from metrics.events_daily_to_artifact as events where event_type in ('ISSUE_OPENED') - and events.bucket_day BETWEEN @metrics_start(DATE) AND @metrics_end(DATE) + and events.bucket_day BETWEEN @metrics_start('DATE') AND @metrics_end('DATE') group by 1, metric, from_artifact_id, diff --git a/warehouse/metrics_mesh/oso_metrics/prs_merged.sql b/warehouse/metrics_mesh/oso_metrics/prs_merged.sql index 55bc6fbe7..e04283d7b 100644 --- a/warehouse/metrics_mesh/oso_metrics/prs_merged.sql +++ b/warehouse/metrics_mesh/oso_metrics/prs_merged.sql @@ -6,7 +6,7 @@ select @metrics_sample_date(events.bucket_day) as metrics_sample_date, SUM(events.amount) as amount from metrics.events_daily_to_artifact as events where event_type in ('PULL_REQUEST_MERGED') - and events.bucket_day BETWEEN @metrics_start(DATE) AND @metrics_end(DATE) + and events.bucket_day BETWEEN @metrics_start('DATE') AND @metrics_end('DATE') group by 1, metric, from_artifact_id, diff --git a/warehouse/metrics_mesh/oso_metrics/prs_opened.sql b/warehouse/metrics_mesh/oso_metrics/prs_opened.sql index d440066fc..f4b21b005 100644 --- a/warehouse/metrics_mesh/oso_metrics/prs_opened.sql +++ b/warehouse/metrics_mesh/oso_metrics/prs_opened.sql @@ -6,7 +6,7 @@ select @metrics_sample_date(events.bucket_day) as metrics_sample_date, SUM(events.amount) as amount from metrics.events_daily_to_artifact as events where event_type in ('PULL_REQUEST_OPENED') - and events.bucket_day BETWEEN @metrics_start(DATE) AND @metrics_end(DATE) + and events.bucket_day BETWEEN @metrics_start('DATE') AND @metrics_end('DATE') group by 1, metric, from_artifact_id, diff --git a/warehouse/metrics_mesh/oso_metrics/stars.sql b/warehouse/metrics_mesh/oso_metrics/stars.sql index 1ac8532aa..d6930516f 100644 --- a/warehouse/metrics_mesh/oso_metrics/stars.sql +++ b/warehouse/metrics_mesh/oso_metrics/stars.sql @@ -6,7 +6,7 @@ select @metrics_sample_date(events.bucket_day) as metrics_sample_date, SUM(events.amount) as amount from metrics.events_daily_to_artifact as events where event_type in ('STARRED') - and events.bucket_day BETWEEN @metrics_start(DATE) AND @metrics_end(DATE) + and events.bucket_day BETWEEN @metrics_start('DATE') AND @metrics_end('DATE') group by 1, metric, from_artifact_id, diff --git a/warehouse/metrics_mesh/oso_metrics/transactions.sql b/warehouse/metrics_mesh/oso_metrics/transactions.sql index 2ff6d1ead..23f134cca 100644 --- a/warehouse/metrics_mesh/oso_metrics/transactions.sql +++ b/warehouse/metrics_mesh/oso_metrics/transactions.sql @@ -6,7 +6,7 @@ select @metrics_sample_date(events.bucket_day) as metrics_sample_date, SUM(events.amount) as amount from metrics.events_daily_to_artifact as events where event_type in ('CONTRACT_INVOCATION_SUCCESS_DAILY_COUNT') - and events.bucket_day BETWEEN @metrics_start(DATE) AND @metrics_end(DATE) + and events.bucket_day BETWEEN @metrics_start('DATE') AND @metrics_end('DATE') group by 1, metric, from_artifact_id, diff --git a/warehouse/metrics_tools/README.md b/warehouse/metrics_tools/README.md index e61911b24..ce280e035 100644 --- a/warehouse/metrics_tools/README.md +++ b/warehouse/metrics_tools/README.md @@ -1,15 +1,8 @@ # metrics_tools -This is a set of tools for sqlmesh to generate metrics based on our data. This -is NOT in the `metrics_mesh` directory because we actually don't want this to -live in that folder. The reason for this is because when sqlmesh loads python -environments into the state connection it attempts to serialize python and that -can make writing tools for it a little difficult. Particularly one of it's -serializers will attempt to store everything that is adjacent to a file that is -generating a model. So, in order to get around this we must instead ensure that -the `@model` decorator is called only within the `metrics_mesh` directory. -However all other orchestration should remain separate of that. Eventually what -we should do is instead override the `model` decorator with our own decorator -that ensures that sqlmesh uses an imported tool. This might not work for others -that use sqlmesh but it will work fine for us and our monorepo and our -deployment style with sqlmesh. +These are sets of tools to create our generated metrics models within sqlmesh. + +The process for the metric model generation is currently as follows: + +- Parse configuration +- Intermediate transformation diff --git a/warehouse/metrics_tools/compute/worker.py b/warehouse/metrics_tools/compute/worker.py new file mode 100644 index 000000000..9cd2959a1 --- /dev/null +++ b/warehouse/metrics_tools/compute/worker.py @@ -0,0 +1,71 @@ +# The worker initialization +import abc +from graphql import ExecutionContext +import pandas as pd +import typing as t +import duckdb +from datetime import datetime +from sqlglot import exp +from dask.distributed import WorkerPlugin, Worker + + +class DuckDBWorkerInterface(abc.ABC): + def fetchdf(self, query: str) -> pd.DataFrame: + raise NotImplementedError("fetchdf not implemented") + + +class MetricsWorkerPlugin(WorkerPlugin): + def __init__(self, duckdb_path: str): + self._duckdb_path = duckdb_path + self._conn = None + + def setup(self, worker: Worker): + self._conn = duckdb.connect(self._duckdb_path) + + # Connect to iceberg if this is a remote worker + self._conn.sql( + """ + INSTALL iceberg; + LOAD iceberg; + + CREATE SECRET secret1 ( + TYPE GCS, + PROVIDER CREDENTIAL_CHAIN + ) + CREATE SCHEMA IF NOT EXISTS sources; + CREATE TABLE IF NOT EXISTS sources.cache_status ( + table_name VARCHAR PRIMARY KEY, + version VARCHAR, + is_locked BOOLEAN + ); + """ + ) + + def teardown(self, worker: Worker): + if self._conn: + self._conn.close() + + @property + def connection(self): + assert self._conn is not None + return self._conn + + def wait_for_cache(self, table: str): + """Checks if a table is cached in the local duckdb""" + self.connection.sql( + f""" + SELECT * FROM {table} + """ + ) + + +def batch_metrics_query( + query: exp.Expression, + context: ExecutionContext, + start: datetime, + end: datetime, + execution_time: datetime, + **kwargs: t.Any, +): + """Yield batches of dataframes""" + pass diff --git a/warehouse/metrics_tools/compute/wrapper.py b/warehouse/metrics_tools/compute/wrapper.py new file mode 100644 index 000000000..d8a10088d --- /dev/null +++ b/warehouse/metrics_tools/compute/wrapper.py @@ -0,0 +1,13 @@ +# A very basic cli or function wrapper that starts a dask cluster and injects an +# environment variable for it that sqlmesh can use. + +import subprocess +import sys + + +def cli(): + subprocess.run(sys.argv[1:]) + + +if __name__ == "__main__": + cli() diff --git a/warehouse/metrics_tools/lib/factories/definition.py b/warehouse/metrics_tools/definition.py similarity index 90% rename from warehouse/metrics_tools/lib/factories/definition.py rename to warehouse/metrics_tools/definition.py index 00f123ed8..76893822b 100644 --- a/warehouse/metrics_tools/lib/factories/definition.py +++ b/warehouse/metrics_tools/definition.py @@ -10,16 +10,15 @@ from sqlmesh.core.macros import MacroEvaluator from sqlmesh.utils.date import TimeLike -from metrics_tools.dialect.translate import ( +from .dialect.translate import ( CustomFuncHandler, CustomFuncRegistry, ) -from metrics_tools.evaluator import FunctionsTransformer +from .evaluator import FunctionsTransformer +from .utils import exp_literal_to_py_literal CURR_DIR = os.path.dirname(__file__) -QUERIES_DIR = os.path.abspath( - os.path.join(CURR_DIR, "../../../metrics_mesh/oso_metrics") -) +QUERIES_DIR = os.path.abspath(os.path.join(CURR_DIR, "../metrics_mesh/oso_metrics")) type ExtraVarBaseType = str | int | float type ExtraVarType = ExtraVarBaseType | t.List[ExtraVarBaseType] @@ -147,7 +146,7 @@ def time_suffix( unit: t.Optional[str], ): if window: - return f"over_{window}_{unit}" + return f"over_{window}_{unit}_window" if time_aggregation: return time_aggregation @@ -177,13 +176,16 @@ class MetricQueryDef: is_intermediate: bool = False - @property - def raw_sql(self): - return open(os.path.join(QUERIES_DIR, self.ref)).read() + enabled: bool = True + + def raw_sql(self, queries_dir: str): + return open(os.path.join(queries_dir, self.ref)).read() - def load_exp(self, default_dialect: str) -> t.List[exp.Expression]: + def load_exp( + self, queries_dir: str, default_dialect: str + ) -> t.List[exp.Expression]: """Loads the queries sql file as a sqlglot expression""" - raw_sql = self.raw_sql + raw_sql = self.raw_sql(queries_dir) dialect = self.dialect or default_dialect return t.cast( @@ -246,13 +248,6 @@ def resolve_table_name( return model_name -def exp_literal_to_py_literal(glot_literal: exp.Expression) -> t.Any: - # Don't error by default let it pass - if not isinstance(glot_literal, exp.Literal): - return glot_literal - return glot_literal.this - - class PeerRefRelativeWindowHandler(CustomFuncHandler): pass @@ -343,8 +338,15 @@ def evaluate( class MetricQuery: @classmethod - def load(cls, *, name: str, default_dialect: str, source: MetricQueryDef): - subquery = cls(name, source, source.load_exp(default_dialect)) + def load( + cls, + *, + name: str, + default_dialect: str, + source: MetricQueryDef, + queries_dir: str, + ): + subquery = cls(name, source, source.load_exp(queries_dir, default_dialect)) subquery.validate() return subquery @@ -359,7 +361,7 @@ def __init__( self._expressions = expressions def validate(self): - queries = find_select_expressions(self._expressions) + queries = find_query_expressions(self._expressions) if len(queries) != 1: raise Exception( f"There must only be a single query expression in metrics query {self._source.ref}" @@ -367,7 +369,7 @@ def validate(self): @property def query_expression(self) -> exp.Query: - return t.cast(exp.Query, find_select_expressions(self._expressions)[0]) + return t.cast(exp.Query, find_query_expressions(self._expressions)[0]) def expression_context(self): return MetricQueryContext(self._source, self._expressions[:]) @@ -376,6 +378,10 @@ def expression_context(self): def reference_name(self): return self._name + @property + def vars(self): + return self._source.vars or {} + def table_name(self, ref: PeerMetricDependencyRef): name = self._source.name or self._name return reference_to_str(ref, name) @@ -431,6 +437,15 @@ def is_intermediate(self): def provided_dependency_refs(self): return self.generate_dependency_refs_for_name(self.reference_name) + @property + def metric_type(self): + if self._source.time_aggregations is not None: + return "time_aggregation" + elif self._source.rolling is not None: + return "rolling" + # This _shouldn't_ happen + raise Exception("unknown metric type") + def generate_query_ref( self, ref: PeerMetricDependencyRef, @@ -724,16 +739,7 @@ def generate_collection_query( return top_level_select -# def generate_models_for_metric_query(name: str, query_def: MetricQueryDef): -# tables: t.Dict[str, str] = {} -# query_def_as_dict = query_def.to_input() -# if "artifact" in query_def.entity_types: - -# @model( -# name=query_def.resolve_table_name(name, "artifact"), - - -def find_select_expressions(expressions: t.List[exp.Expression]): +def find_query_expressions(expressions: t.List[exp.Expression]): return list(filter(lambda a: isinstance(a, exp.Query), expressions)) @@ -744,40 +750,6 @@ class DailyTimeseriesRollingWindowOptions(t.TypedDict): model_options: t.NotRequired[t.Dict[str, t.Any]] -# def generate_models_for_metric_query(name: str, query_def: MetricQueryDef): -# tables: t.Dict[str, str] = {} -# query_def_as_dict = query_def.to_input() -# if "artifact" in query_def.entity_types: - -# @model( -# name=query_def.resolve_table_name(name, "artifact"), -# is_sql=True, -# kind={ -# "name": ModelKindName.INCREMENTAL_BY_TIME_RANGE, -# "time_column": "bucket_day", -# "batch_size": 1, -# }, -# dialect="clickhouse", -# columns={ -# "bucket_day": exp.DataType.build("DATE", dialect="clickhouse"), -# "event_source": exp.DataType.build("String", dialect="clickhouse"), -# "to_artifact_id": exp.DataType.build("String", dialect="clickhouse"), -# "from_artifact_id": exp.DataType.build("String", dialect="clickhouse"), -# "metric": exp.DataType.build("String", dialect="clickhouse"), -# "amount": exp.DataType.build("Float64", dialect="clickhouse"), -# }, -# grain=["metric", "to_artifact_id", "from_artifact_id", "bucket_day"], -# ) -# def _generated_to_artifact_models(): -# pass - -# if "project" in query_def.entity_types: -# pass - -# if "collection" in query_def.entity_types: -# pass - - def join_all_of_entity_type( evaluator: MacroEvaluator, *, db: str, tables: t.List[str], columns: t.List[str] ): @@ -825,6 +797,7 @@ class TimeseriesMetricsOptions(t.TypedDict): model_options: t.NotRequired[t.Dict[str, t.Any]] start: TimeLike timeseries_sources: t.NotRequired[t.Optional[t.List[str]]] + queries_dir: t.NotRequired[t.Optional[str]] class GeneratedArtifactConfig(t.TypedDict): @@ -850,6 +823,7 @@ def generated_entity( name=query_reference_name, default_dialect=default_dialect, source=query_def, + queries_dir=QUERIES_DIR, ) peer_table_map = dict(peer_table_tuples) e = query.generate_query_ref( diff --git a/warehouse/metrics_tools/lib/factories/__init__.py b/warehouse/metrics_tools/factory/__init__.py similarity index 61% rename from warehouse/metrics_tools/lib/factories/__init__.py rename to warehouse/metrics_tools/factory/__init__.py index fbc707eb4..f0caa6c32 100644 --- a/warehouse/metrics_tools/lib/factories/__init__.py +++ b/warehouse/metrics_tools/factory/__init__.py @@ -1,4 +1,4 @@ # ruff: noqa: F403 from .factory import * -from .definition import * +from ..definition import * diff --git a/warehouse/metrics_tools/factory/factory.py b/warehouse/metrics_tools/factory/factory.py new file mode 100644 index 000000000..a9c4227f9 --- /dev/null +++ b/warehouse/metrics_tools/factory/factory.py @@ -0,0 +1,696 @@ +import contextlib +import inspect +import logging +import os +from queue import PriorityQueue +import typing as t +import textwrap +from dataclasses import dataclass, field + +from sqlmesh.core.macros import MacroEvaluator +from sqlmesh.utils.date import TimeLike +from sqlmesh.core.model import ModelKindName +import sqlglot as sql +from sqlglot import exp + +from metrics_tools.joiner import JoinerTransform +from metrics_tools.transformer import ( + SQLTransformer, + IntermediateMacroEvaluatorTransform, +) +from metrics_tools.transformer.qualify import QualifyTransform +from metrics_tools.definition import ( + MetricQuery, + PeerMetricDependencyRef, + TimeseriesMetricsOptions, + reference_to_str, +) +from metrics_tools.models import GeneratedModel +from metrics_tools.factory.macros import ( + metrics_end, + metrics_entity_type_col, + metrics_name, + metrics_sample_date, + metrics_start, + relative_window_sample_date, + metrics_entity_type_alias, + metrics_peer_ref, +) + +logger = logging.getLogger(__name__) + +type ExtraVarBaseType = str | int | float +type ExtraVarType = ExtraVarBaseType | t.List[ExtraVarBaseType] + +CURR_DIR = os.path.dirname(__file__) +QUERIES_DIR = os.path.abspath(os.path.join(CURR_DIR, "../../metrics_mesh/oso_metrics")) + +TIME_AGGREGATION_TO_CRON = { + "daily": "@daily", + "monthly": "@monthly", + "weekly": "@weekly", +} +METRICS_COLUMNS_BY_ENTITY: t.Dict[str, t.Dict[str, exp.DataType]] = { + "artifact": { + "metrics_sample_date": exp.DataType.build("DATE", dialect="clickhouse"), + "event_source": exp.DataType.build("String", dialect="clickhouse"), + "to_artifact_id": exp.DataType.build("String", dialect="clickhouse"), + "from_artifact_id": exp.DataType.build("String", dialect="clickhouse"), + "metric": exp.DataType.build("String", dialect="clickhouse"), + "amount": exp.DataType.build("Float64", dialect="clickhouse"), + }, + "project": { + "metrics_sample_date": exp.DataType.build("DATE", dialect="clickhouse"), + "event_source": exp.DataType.build("String", dialect="clickhouse"), + "to_project_id": exp.DataType.build("String", dialect="clickhouse"), + "from_artifact_id": exp.DataType.build("String", dialect="clickhouse"), + "metric": exp.DataType.build("String", dialect="clickhouse"), + "amount": exp.DataType.build("Float64", dialect="clickhouse"), + }, + "collection": { + "metrics_sample_date": exp.DataType.build("DATE", dialect="clickhouse"), + "event_source": exp.DataType.build("String", dialect="clickhouse"), + "to_collection_id": exp.DataType.build("String", dialect="clickhouse"), + "from_artifact_id": exp.DataType.build("String", dialect="clickhouse"), + "metric": exp.DataType.build("String", dialect="clickhouse"), + "amount": exp.DataType.build("Float64", dialect="clickhouse"), + }, +} + + +def generate_metric_models( + calling_file: str, + query: MetricQuery, + default_dialect: str, + peer_table_map: t.Dict[str, str], + start: TimeLike, + timeseries_sources: t.List[str], +): + # Turn the source into a dict so it can be used in the sqlmesh context + refs = query.provided_dependency_refs + + all_tables: t.Dict[str, t.List[str]] = { + "artifact": [], + "project": [], + "collection": [], + } + + for ref in refs: + cron = "@daily" + time_aggregation = ref.get("time_aggregation") + window = ref.get("window") + if time_aggregation: + cron = TIME_AGGREGATION_TO_CRON[time_aggregation] + else: + if not window: + raise Exception("window or time_aggregation must be set") + assert query._source.rolling + cron = query._source.rolling["cron"] + + table_name = query.table_name(ref) + all_tables[ref["entity_type"]].append(table_name) + columns = METRICS_COLUMNS_BY_ENTITY[ref["entity_type"]] + additional_macros = [ + metrics_peer_ref, + metrics_entity_type_col, + metrics_entity_type_alias, + relative_window_sample_date, + (metrics_name, ["metric_name"]), + ] + + kind_common = {"batch_size": 1} + partitioned_by = ("day(metrics_sample_date)",) + + # Due to how the schedulers work for sqlmesh we actually can't batch if + # we're using a weekly cron for a time aggregation. In order to have + # this work we just adjust the start/end time for the + # metrics_start/metrics_end and also give a large enough batch time to + # fit a few weeks. This ensures there's on missing data + if time_aggregation == "weekly": + kind_common = {"batch_size": 182, "lookback": 7} + if time_aggregation == "monthly": + kind_common = {"batch_size": 6} + partitioned_by = ("month(metrics_sample_date)",) + if time_aggregation == "daily": + kind_common = {"batch_size": 180} + + evaluator_variables: t.Dict[str, t.Any] = { + "entity_type": ref["entity_type"], + "time_aggregation": ref.get("time_aggregation", None), + "rolling_window": ref.get("window", None), + "rolling_unit": ref.get("unit", None), + } + evaluator_variables.update(query.vars) + + transformer = SQLTransformer( + disable_qualify=True, + transforms=[ + IntermediateMacroEvaluatorTransform( + additional_macros, + variables=evaluator_variables, + ), + JoinerTransform( + ref["entity_type"], + ), + ], + ) + + rendered_query = transformer.transform([query.query_expression]) + logger.debug(rendered_query) + + if ref["entity_type"] == "artifact": + GeneratedModel.create( + func=generated_query, + source="", + entrypoint_path=calling_file, + config={}, + name=f"metrics.{table_name}", + kind={ + "name": ModelKindName.INCREMENTAL_BY_TIME_RANGE, + "time_column": "metrics_sample_date", + **kind_common, + }, + dialect="clickhouse", + columns=columns, + grain=[ + "metric", + "to_artifact_id", + "from_artifact_id", + "metrics_sample_date", + ], + cron=cron, + start=start, + additional_macros=additional_macros, + partitioned_by=partitioned_by, + ) + + if ref["entity_type"] == "project": + GeneratedModel.create( + func=generated_query, + source="", + entrypoint_path=calling_file, + config={}, + name=f"metrics.{table_name}", + kind={ + "name": ModelKindName.INCREMENTAL_BY_TIME_RANGE, + "time_column": "metrics_sample_date", + **kind_common, + }, + dialect="clickhouse", + columns=columns, + grain=[ + "metric", + "to_project_id", + "from_artifact_id", + "metrics_sample_date", + ], + cron=cron, + start=start, + additional_macros=additional_macros, + partitioned_by=partitioned_by, + ) + if ref["entity_type"] == "collection": + GeneratedModel.create( + func=generated_query, + source="", + entrypoint_path=calling_file, + config={}, + name=f"metrics.{table_name}", + kind={ + "name": ModelKindName.INCREMENTAL_BY_TIME_RANGE, + "time_column": "metrics_sample_date", + **kind_common, + }, + dialect="clickhouse", + columns=columns, + grain=[ + "metric", + "to_collection_id", + "from_artifact_id", + "metrics_sample_date", + ], + cron=cron, + start=start, + additional_macros=additional_macros, + partitioned_by=partitioned_by, + ) + + return all_tables + + +@contextlib.contextmanager +def metric_ref_evaluator_context( + evaluator: MacroEvaluator, + ref: PeerMetricDependencyRef, + extra_vars: t.Optional[t.Dict[str, t.Any]] = None, +): + before = evaluator.locals.copy() + evaluator.locals.update(extra_vars or {}) + evaluator.locals.update( + { + "rolling_window": ref.get("window"), + "rolling_unit": ref.get("unit"), + "time_aggregation": ref.get("time_aggregation"), + "entity_type": ref.get("entity_type"), + } + ) + try: + yield + finally: + evaluator.locals = before + + +def generated_query( + evaluator: MacroEvaluator, + *, + rendered_query_str: str, + ref: PeerMetricDependencyRef, + table_name: str, + vars: t.Dict[str, t.Any], +): + from sqlmesh.core.dialect import parse_one + + with metric_ref_evaluator_context(evaluator, ref, vars): + result = evaluator.transform(parse_one(rendered_query_str)) + return result + + +class MetricQueryConfig(t.TypedDict): + table_name: str + ref: PeerMetricDependencyRef + rendered_query: exp.Expression + vars: t.Dict[str, t.Any] + query: MetricQuery + + +class MetricsCycle(Exception): + pass + + +class TimeseriesMetrics: + @classmethod + def from_raw_options(cls, **raw_options: t.Unpack[TimeseriesMetricsOptions]): + timeseries_sources = raw_options.get( + "timeseries_sources", ["events_daily_to_artifact"] + ) + assert timeseries_sources is not None + queries_dir = raw_options.get("queries_dir", QUERIES_DIR) + assert queries_dir is not None + + enabled_queries = filter( + lambda item: item[1].enabled, raw_options["metric_queries"].items() + ) + + metrics_queries = [ + MetricQuery.load( + name=name, + default_dialect=raw_options.get("default_dialect", "clickhouse"), + source=query_def, + queries_dir=queries_dir, + ) + for name, query_def in enabled_queries + ] + + # Build the dependency graph of all the metrics queries + peer_table_map: t.Dict[str, str] = {} + for query in metrics_queries: + provided_refs = query.provided_dependency_refs + for ref in provided_refs: + peer_table_map[reference_to_str(ref)] = query.table_name(ref) + + return cls(timeseries_sources, metrics_queries, peer_table_map, raw_options) + + def __init__( + self, + timeseries_sources: t.List[str], + metrics_queries: t.List[MetricQuery], + peer_table_map: t.Dict[str, str], + raw_options: TimeseriesMetricsOptions, + ): + marts_tables: t.Dict[str, t.List[str]] = { + "artifact": [], + "project": [], + "collection": [], + } + self._timeseries_sources = timeseries_sources + self._metrics_queries = metrics_queries + self._peer_table_map = peer_table_map + self._marts_tables = marts_tables + self._raw_options = raw_options + self._rendered = False + self._rendered_queries: t.Dict[str, MetricQueryConfig] = {} + + def generate_queries(self): + if self._rendered: + return self._rendered_queries + + queries: t.Dict[str, MetricQueryConfig] = {} + for query in self._metrics_queries: + queries.update( + self._generate_metrics_queries(query, self._peer_table_map, "metrics") + ) + self._rendered_queries = queries + self._rendered = True + return queries + + def _generate_metrics_queries( + self, + query: MetricQuery, + peer_table_map: t.Dict[str, str], + db_name: str, + ): + """Given a MetricQuery, generate all of the queries for it's given dimensions""" + # Turn the source into a dict so it can be used in the sqlmesh context + refs = query.provided_dependency_refs + + marts_tables = self._marts_tables + + queries: t.Dict[str, MetricQueryConfig] = {} + for ref in refs: + table_name = query.table_name(ref) + + if not query.is_intermediate: + marts_tables[ref["entity_type"]].append(table_name) + + additional_macros = [ + metrics_peer_ref, + metrics_entity_type_col, + metrics_entity_type_alias, + relative_window_sample_date, + (metrics_name, ["metric_name"]), + ] + + evaluator_variables: t.Dict[str, t.Any] = { + "generated_metric_name": ref["name"], + "entity_type": ref["entity_type"], + "time_aggregation": ref.get("time_aggregation", None), + "rolling_window": ref.get("window", None), + "rolling_unit": ref.get("unit", None), + "$$peer_table_map": peer_table_map, + "$$peer_db": db_name, + } + evaluator_variables.update(query.vars) + + transformer = SQLTransformer( + disable_qualify=True, + transforms=[ + IntermediateMacroEvaluatorTransform( + additional_macros, + variables=evaluator_variables, + ), + QualifyTransform(), + JoinerTransform( + ref["entity_type"], + ), + ], + ) + + rendered_query = transformer.transform([query.query_expression]) + + assert rendered_query is not None + assert len(rendered_query) == 1 + queries[table_name] = MetricQueryConfig( + table_name=table_name, + ref=ref, + rendered_query=rendered_query[0], + vars=query._source.vars or {}, + query=query, + ) + return queries + + def generate_ordered_queries(self): + """Perform a topological sort on all the queries within metrics""" + + @dataclass(order=True) + class MetricQueryConfigQueueItem: + depth: int + config: MetricQueryConfig = field(compare=False) + + # hack for now. We actually need to resolve the queries to do proper + # ordering but this is mostly for testing. + sources = self._timeseries_sources + queries = self.generate_queries() + + queue = PriorityQueue() + + visited: t.Dict[str, int] = {} + cycle_lock: t.Dict[str, bool] = {} + + def queue_query(name: str): + if name in cycle_lock: + raise MetricsCycle("Invalid metrics. Cycle detected") + + if name in visited: + return visited[name] + cycle_lock[name] = True + + query_config = queries[name] + rendered_query = query_config["rendered_query"] + depth = 0 + tables = rendered_query.find_all(exp.Table) + for table in tables: + db_name = table.db + if isinstance(table.db, exp.Identifier): + db_name = table.db.this + table_name = table.this.this + + if db_name != "metrics": + continue + + if table_name in sources: + continue + + try: + parent_depth = queue_query(table_name) + except MetricsCycle: + parent_query = queries[table_name]["rendered_query"] + raise MetricsCycle( + textwrap.dedent( + f"""Cycle from {name} to {table_name}: + --- + {name}: + {rendered_query.sql(dialect="duckdb", pretty=True)} + ---- + parent: + {parent_query.sql(dialect="duckdb", pretty=True)} + """ + ) + ) + if parent_depth + 1 > depth: + depth = parent_depth + 1 + queue.put(MetricQueryConfigQueueItem(depth, query_config)) + visited[name] = depth + del cycle_lock[name] + return depth + + for name in queries.keys(): + if visited.get(name) is None: + queue_query(name) + + while not queue.empty(): + item = t.cast(MetricQueryConfigQueueItem, queue.get()) + depth = item.depth + query_config = item.config + yield (depth, query_config) + + def generate_models(self, calling_file: str): + """Generates sqlmesh models for all the configured metrics definitions""" + # Generate the models + + for _, query_config in self.generate_ordered_queries(): + self.generate_model_for_rendered_query(calling_file, query_config) + + # Join all of the models of the same entity type into the same view model + for entity_type, tables in self._marts_tables.items(): + GeneratedModel.create( + func=join_all_of_entity_type, + entrypoint_path=calling_file, + config={ + "db": "metrics", + "tables": tables, + "columns": list(METRICS_COLUMNS_BY_ENTITY[entity_type].keys()), + }, + name=f"metrics.timeseries_metrics_to_{entity_type}", + kind="VIEW", + dialect="clickhouse", + start=self._raw_options["start"], + columns={ + k: METRICS_COLUMNS_BY_ENTITY[entity_type][k] + for k in filter( + lambda col: col not in ["event_source"], + METRICS_COLUMNS_BY_ENTITY[entity_type].keys(), + ) + }, + ) + print("model generation complete") + + def generate_model_for_rendered_query( + self, calling_file: str, query_config: MetricQueryConfig + ): + query = query_config["query"] + match query.metric_type: + case "rolling": + self.generate_rolling_model_for_rendered_query( + calling_file, query_config + ) + case "time_aggregation": + self.generate_time_aggregation_model_for_rendered_query( + calling_file, query_config + ) + + def generate_rolling_model_for_rendered_query( + self, calling_file: str, query_config: MetricQueryConfig + ): + """TODO change this to a python model""" + config = self.serializable_config(query_config) + + ref = query_config["ref"] + query = query_config["query"] + + columns = METRICS_COLUMNS_BY_ENTITY[ref["entity_type"]] + + kind_common = {"batch_size": 1} + partitioned_by = ("day(metrics_sample_date)",) + window = ref.get("window") + assert window is not None + assert query._source.rolling + cron = query._source.rolling["cron"] + + grain = [ + "metric", + f"to_{ref['entity_type']}_id", + "from_artifact_id", + "metrics_sample_date", + ] + + GeneratedModel.create( + func=generated_query, + entrypoint_path=calling_file, + config=config, + name=f"metrics.{query_config['table_name']}", + kind={ + "name": ModelKindName.INCREMENTAL_BY_TIME_RANGE, + "time_column": "metrics_sample_date", + **kind_common, + }, + dialect="clickhouse", + columns=columns, + grain=grain, + cron=cron, + start=self._raw_options["start"], + additional_macros=self.generated_model_additional_macros, + partitioned_by=partitioned_by, + ) + + def generate_time_aggregation_model_for_rendered_query( + self, calling_file: str, query_config: MetricQueryConfig + ): + """Generate model for time aggregation models""" + # Use a simple python sql model to generate the time_aggregation model + config = self.serializable_config(query_config) + + ref = query_config["ref"] + + columns = METRICS_COLUMNS_BY_ENTITY[ref["entity_type"]] + + time_aggregation = ref.get("time_aggregation") + assert time_aggregation is not None + + kind_options = {"batch_size": 180, "lookback": 7} + partitioned_by = ("day(metrics_sample_date)",) + + if time_aggregation == "weekly": + kind_options = {"batch_size": 182, "lookback": 7} + if time_aggregation == "monthly": + kind_options = {"batch_size": 6, "lookback": 1} + partitioned_by = ("month(metrics_sample_date)",) + + grain = [ + "metric", + f"to_{ref['entity_type']}_id", + "from_artifact_id", + "metrics_sample_date", + ] + cron = TIME_AGGREGATION_TO_CRON[time_aggregation] + + GeneratedModel.create( + func=generated_query, + entrypoint_path=calling_file, + config=config, + name=f"metrics.{query_config['table_name']}", + kind={ + "name": ModelKindName.INCREMENTAL_BY_TIME_RANGE, + "time_column": "metrics_sample_date", + **kind_options, + }, + dialect="clickhouse", + columns=columns, + grain=grain, + cron=cron, + start=self._raw_options["start"], + additional_macros=self.generated_model_additional_macros, + partitioned_by=partitioned_by, + ) + + def serializable_config(self, query_config: MetricQueryConfig): + # Use a simple python sql model to generate the time_aggregation model + config: t.Dict[str, t.Any] = t.cast(dict, query_config.copy()) + + # MetricQuery can't be serialized. Remove it. It's not needed + del config["query"] + # Apparently expressions also cannot be serialized. + del config["rendered_query"] + config["rendered_query_str"] = query_config["rendered_query"].sql( + dialect="duckdb" + ) + return config + + @property + def generated_model_additional_macros(self): + return [metrics_end, metrics_start, metrics_sample_date] + + +def timeseries_metrics( + **raw_options: t.Unpack[TimeseriesMetricsOptions], +): + calling_file = inspect.stack()[1].filename + timeseries_metrics = TimeseriesMetrics.from_raw_options(**raw_options) + return timeseries_metrics.generate_models(calling_file) + + +def join_all_of_entity_type( + evaluator: MacroEvaluator, *, db: str, tables: t.List[str], columns: t.List[str] +): + # A bit of a hack but we know we have a "metric" column. We want to + # transform this metric id to also include the event_source as a prefix to + # that metric id in the joined table + transformed_columns = [] + for column in columns: + if column == "event_source": + continue + if column == "metric": + transformed_columns.append( + exp.alias_( + exp.Concat( + expressions=[ + exp.to_column("event_source"), + exp.Literal(this="_", is_string=True), + exp.to_column(column), + ], + safe=False, + coalesce=False, + ), + alias="metric", + ) + ) + else: + transformed_columns.append(column) + + query = exp.select(*transformed_columns).from_(sql.to_table(f"{db}.{tables[0]}")) + for table in tables[1:]: + query = query.union( + exp.select(*transformed_columns).from_(sql.to_table(f"{db}.{table}")), + distinct=False, + ) + # Calculate the correct metric_id for all of the entity types + return query diff --git a/warehouse/metrics_tools/factory/fixtures/metrics/active_days.sql b/warehouse/metrics_tools/factory/fixtures/metrics/active_days.sql new file mode 100644 index 000000000..c3f61eb85 --- /dev/null +++ b/warehouse/metrics_tools/factory/fixtures/metrics/active_days.sql @@ -0,0 +1,15 @@ +-- Get the active days of a given user +select @metrics_sample_date(events.bucket_day) as metrics_sample_date, + events.event_source, + events.to_artifact_id, + events.from_artifact_id as from_artifact_id, + @metric_name() as metric, + COUNT(DISTINCT events.bucket_day) amount +from metrics.events_daily_to_artifact as events +where event_type in @activity_event_types + and events.bucket_day BETWEEN @metrics_start('DATE') AND @metrics_end('DATE') +group by 1, + metric, + from_artifact_id, + to_artifact_id, + event_source, \ No newline at end of file diff --git a/warehouse/metrics_tools/factory/fixtures/metrics/change_in_developers.sql b/warehouse/metrics_tools/factory/fixtures/metrics/change_in_developers.sql new file mode 100644 index 000000000..4c4ea7275 --- /dev/null +++ b/warehouse/metrics_tools/factory/fixtures/metrics/change_in_developers.sql @@ -0,0 +1,64 @@ +WITH latest AS ( + SELECT classification.metrics_sample_date, + classification.event_source, + @metrics_entity_type_col( + 'to_{entity_type}_id', + table_alias := classification + ), + classification.metric, + classification.amount + FROM @metrics_peer_ref( + developer_classifications, + window := @comparison_interval, + unit := 'day', + ) as classification + WHERE classification.metrics_sample_date = @relative_window_sample_date( + @metrics_end('DATE'), + @comparison_interval, + 'day', + 0 + ) +), +previous AS ( + SELECT classification.metrics_sample_date, + classification.event_source, + @metrics_entity_type_col( + 'to_{entity_type}_id', + table_alias := classification, + include_column_alias := true, + ), + classification.metric, + classification.amount + FROM @metrics_peer_ref( + developer_classifications, + window := @comparison_interval, + unit := 'day' + ) as classification + WHERE classification.metrics_sample_date = @relative_window_sample_date( + @metrics_end('DATE'), + @comparison_interval, + 'day', + -1 + ) +) +select @metrics_end('DATE') as metrics_sample_date, + COALESCE(latest.event_source, previous.event_source) as event_source, + @metrics_entity_type_alias( + COALESCE( + @metrics_entity_type_col('to_{entity_type}_id', table_alias := latest), + @metrics_entity_type_col('to_{entity_type}_id', table_alias := previous) + ), + 'to_{entity_type}_id', + ), + '' as from_artifact_id, + @metrics_name( + CONCAT( + 'change_in_', + COALESCE(previous.metric, latest.metric) + ) + ) as metric, + latest.amount - previous.amount as amount +FROM previous + LEFT JOIN latest ON latest.event_source = previous.event_source + AND @metrics_entity_type_col('to_{entity_type}_id', table_alias := latest) = @metrics_entity_type_col('to_{entity_type}_id', table_alias := previous) + AND latest.metric = previous.metric \ No newline at end of file diff --git a/warehouse/metrics_tools/factory/fixtures/metrics/developer_activity_classification.sql b/warehouse/metrics_tools/factory/fixtures/metrics/developer_activity_classification.sql new file mode 100644 index 000000000..c0eb7c3f8 --- /dev/null +++ b/warehouse/metrics_tools/factory/fixtures/metrics/developer_activity_classification.sql @@ -0,0 +1,70 @@ +select active.metrics_sample_date, + active.event_source, + @metrics_entity_type_col( + 'to_{entity_type}_id', + table_alias := active, + include_column_alias := true, + ), + '' as from_artifact_id, + @metric_name('full_time_developers') as metric, + COUNT(DISTINCT active.from_artifact_id) as amount +from @metrics_peer_ref( + developer_active_days, + window := @rolling_window, + unit := @rolling_unit + ) as active +where active.amount / @rolling_window >= @full_time_ratio + and active.metrics_sample_date = @metrics_end('DATE') +group by metric, + from_artifact_id, + @metrics_entity_type_col( + 'to_{entity_type}_id', + table_alias := active, + ), + event_source, + metrics_sample_date +union all +select active.metrics_sample_date, + active.event_source, + @metrics_entity_type_col( + 'to_{entity_type}_id', + table_alias := active, + include_column_alias := true, + ), + '' as from_artifact_id, + @metric_name('part_time_developers') as metric, + COUNT(DISTINCT active.from_artifact_id) as amount +from @metrics_peer_ref( + developer_active_days, + window := @rolling_window, + unit := @rolling_unit + ) as active +where active.amount / @rolling_window >= @full_time_ratio + and active.metrics_sample_date = @metrics_end('DATE') +group by metric, + from_artifact_id, + @metrics_entity_type_col('to_{entity_type}_id', table_alias := active), + event_source, + metrics_sample_date +union all +select active.metrics_sample_date, + active.event_source, + @metrics_entity_type_col( + 'to_{entity_type}_id', + table_alias := active, + include_column_alias := true, + ), + '' as from_artifact_id, + @metric_name('active_developers') as metric, + COUNT(DISTINCT active.from_artifact_id) as amount +from @metrics_peer_ref( + developer_active_days, + window := @rolling_window, + unit := @rolling_unit + ) as active +where active.metrics_sample_date = @metrics_end('DATE') +group by metric, + from_artifact_id, + @metrics_entity_type_col('to_{entity_type}_id', table_alias := active), + event_source, + metrics_sample_date \ No newline at end of file diff --git a/warehouse/metrics_tools/factory/fixtures/metrics/visits.sql b/warehouse/metrics_tools/factory/fixtures/metrics/visits.sql new file mode 100644 index 000000000..d19d26d55 --- /dev/null +++ b/warehouse/metrics_tools/factory/fixtures/metrics/visits.sql @@ -0,0 +1,14 @@ +select @metrics_sample_date(events.bucket_day) as metrics_sample_date, + events.event_source, + events.to_artifact_id, + '' as from_artifact_id, + @metric_name() as metric, + SUM(events.amount) as amount +from metrics.events_daily_to_artifact as events +where event_type in ('VISIT') + and events.bucket_day BETWEEN @metrics_start('DATE') AND @metrics_end('DATE') +group by 1, + metric, + from_artifact_id, + to_artifact_id, + event_source \ No newline at end of file diff --git a/warehouse/metrics_tools/factory/gen_data.py b/warehouse/metrics_tools/factory/gen_data.py new file mode 100644 index 000000000..3d979432a --- /dev/null +++ b/warehouse/metrics_tools/factory/gen_data.py @@ -0,0 +1,180 @@ +import os +import typing as t +import duckdb +import pandas as pd +import hashlib +import base64 +import arrow +from datetime import datetime + +METRICS_TOOLS_DB_FIXTURE_PATH = os.environ.get("METRICS_TOOLS_DB_FIXTURE_PATH", "") + + +class MetricsDBFixture: + @classmethod + def create(cls, encode_ids: bool = False): + database_path = ":memory:" + if METRICS_TOOLS_DB_FIXTURE_PATH: + database_path = os.path.join( + METRICS_TOOLS_DB_FIXTURE_PATH, + f"metrics-{arrow.now().strftime('%Y-%m-%d-%H-%M-%S')}.db", + ) + + conn = duckdb.connect(database=database_path) + + conn.execute("CREATE SCHEMA metrics") + conn.execute("CREATE SCHEMA sources") + + # Create the events table if it doesn't already exist + conn.execute( + """ + CREATE TABLE IF NOT EXISTS metrics.events_daily_to_artifact ( + bucket_day DATE, + event_type STRING, + event_source STRING, + from_artifact_id STRING, + to_artifact_id STRING, + amount FLOAT + ) + """ + ) + + # Create the artifacts_to_projects_v1 table if it doesn't already exist + conn.execute( + """ + CREATE TABLE IF NOT EXISTS sources.artifacts_by_project_v1 ( + artifact_id STRING, + artifact_source_id STRING, + artifact_source STRING, + artifact_namespace STRING, + artifact_name STRING, + project_id STRING, + project_source STRING, + project_namespace STRING, + project_name STRING + ) + """ + ) + + # Create the projects_to_collections_v1 table if it doesn't already exist + conn.execute( + """ + CREATE TABLE IF NOT EXISTS sources.projects_by_collection_v1 ( + project_id STRING, + project_source STRING, + project_namespace STRING, + project_name STRING, + collection_id STRING, + collection_source STRING, + collection_namespace STRING, + collection_name STRING + ) + """ + ) + + return cls(conn, encode_ids) + + def __init__(self, conn: duckdb.DuckDBPyConnection, encode_ids: bool): + self._conn = conn + self._encode_ids = encode_ids + + def encode_sha256_base64(self, input_str): + """Helper method to encode a string to base64 SHA256 hash.""" + if not self._encode_ids: + return input_str + sha256_hash = hashlib.sha256(input_str.encode()).digest() + return base64.b64encode(sha256_hash).decode() + + def generate_daily_events( + self, + start_ds: str, + end_ds: str, + event_type: str, + from_artifact: str, + to_artifact: str, + event_source: str = "TEST", + amount: float = 1.0, + date_filter: t.Callable[[pd.DataFrame], pd.DataFrame] = lambda a: a, + ): + # Convert string dates to datetime objects + start_date = datetime.strptime(start_ds, "%Y-%m-%d") + end_date = datetime.strptime(end_ds, "%Y-%m-%d") + date_range = pd.date_range(start=start_date, end=end_date) + + # Static values + + from_artifact_id = self.encode_sha256_base64(from_artifact) + to_artifact_id = self.encode_sha256_base64(to_artifact) + + # Create DataFrame + data = { + "bucket_day": date_range, + "event_type": event_type, + "event_source": event_source, + "from_artifact_id": from_artifact_id, + "to_artifact_id": to_artifact_id, + "amount": amount, + } + df = pd.DataFrame(data) # noqa: F841 + df = date_filter(df) + + # Insert data into DuckDB table + self._conn.execute( + "INSERT INTO metrics.events_daily_to_artifact SELECT * FROM df" + ) + + def populate_artifacts_and_projects( + self, projects: t.Dict[str, t.List[str]], collections: t.Dict[str, t.List[str]] + ): + """Populate artifacts_by_project_v1 and projects_by_collection_v1 tables.""" + + # Prepare data for artifacts_by_project_v1 + artifacts_data = [] + for project_name, artifact_names in projects.items(): + project_id = self.encode_sha256_base64(project_name) + for artifact_name in artifact_names: + artifact_id = self.encode_sha256_base64(artifact_name) + artifacts_data.append( + { + "artifact_id": artifact_id, + "artifact_source_id": "TEST", + "artifact_source": "TEST", + "artifact_namespace": "TEST", + "artifact_name": artifact_name, + "project_id": project_id, + "project_source": "TEST", + "project_namespace": "TEST", + "project_name": project_name, + } + ) + + # Insert data into artifacts_by_project_v1 table + artifacts_df = pd.DataFrame(artifacts_data) # noqa: F841 + self._conn.execute( + "INSERT INTO sources.artifacts_by_project_v1 SELECT * FROM artifacts_df" + ) + + # Prepare data for projects_by_collection_v1 + collections_data = [] + for collection_name, project_names in collections.items(): + collection_id = self.encode_sha256_base64(collection_name) + for project_name in project_names: + project_id = self.encode_sha256_base64(project_name) + collections_data.append( + { + "project_id": project_id, + "project_source": "TEST", + "project_namespace": "TEST", + "project_name": project_name, + "collection_id": collection_id, + "collection_source": "TEST", + "collection_namespace": "TEST", + "collection_name": collection_name, + } + ) + + # Insert data into projects_by_collection_v1 table + collections_df = pd.DataFrame(collections_data) # noqa: F841 + self._conn.execute( + "INSERT INTO sources.projects_by_collection_v1 SELECT * FROM collections_df" + ) diff --git a/warehouse/metrics_tools/lib/factories/macros.py b/warehouse/metrics_tools/factory/macros.py similarity index 60% rename from warehouse/metrics_tools/lib/factories/macros.py rename to warehouse/metrics_tools/factory/macros.py index 6a25d1d31..af8bd113f 100644 --- a/warehouse/metrics_tools/lib/factories/macros.py +++ b/warehouse/metrics_tools/factory/macros.py @@ -2,10 +2,16 @@ import sqlglot from sqlglot import expressions as exp -from sqlmesh.core.dialect import MacroVar, parse_one +from sqlmesh.core.dialect import MacroVar from sqlmesh.core.macros import MacroEvaluator +from sqlmesh.core.dialect import parse_one -from .definition import time_suffix +from metrics_tools.definition import ( + PeerMetricDependencyRef, + time_suffix, + to_actual_table_name, +) +from metrics_tools.utils import exp_literal_to_py_literal def relative_window_sample_date( @@ -25,8 +31,8 @@ def relative_window_sample_date( must be a valid thing to subtract from. Also note, the base should generally be the `@metrics_end` date. """ - if evaluator.runtime_stage in ["loading", "creating"]: - return parse_one("STR_TO_DATE('1970-01-01', '%Y-%m-%d')") + # if evaluator.runtime_stage in ["loading", "creating"]: + # return parse_one("STR_TO_DATE('1970-01-01', '%Y-%m-%d')") if relative_index == 0: return base @@ -44,21 +50,20 @@ def relative_window_sample_date( else: unit = transformed.sql() - rel_index = 0 + converted_relative_index = 0 if isinstance(relative_index, exp.Literal): - rel_index = int(t.cast(int, relative_index.this)) - elif isinstance(relative_index, exp.Expression): - rel_index = int(relative_index.sql()) - + converted_relative_index = int(t.cast(int, relative_index.this)) + elif isinstance(relative_index, exp.Neg): + converted_relative_index = int(relative_index.this.this) * -1 interval_unit = exp.Var(this=unit) interval_delta = exp.Interval( this=exp.Mul( - this=exp.Literal(this=str(abs(rel_index)), is_string=False), + this=exp.Literal(this=str(abs(converted_relative_index)), is_string=False), expression=window, ), unit=interval_unit, ) - if relative_index > 0: + if converted_relative_index > 0: return exp.Add(this=base, expression=interval_delta) else: return exp.Sub(this=base, expression=interval_delta) @@ -67,7 +72,6 @@ def relative_window_sample_date( def time_aggregation_bucket( evaluator: MacroEvaluator, time_exp: exp.Expression, interval: str ): - from sqlmesh.core.dialect import parse_one if evaluator.runtime_stage in ["loading", "creating"]: return parse_one("STR_TO_DATE('1970-01-01', '%Y-%m-%d')") @@ -81,7 +85,7 @@ def time_aggregation_bucket( this="TIME_BUCKET", expressions=[ exp.Interval( - this=exp.Literal(this=1, is_string=False), + this=exp.Literal(this="1", is_string=False), unit=exp.Var(this=rollup_to_interval[interval]), ), exp.Cast( @@ -175,19 +179,51 @@ def metrics_start(evaluator: MacroEvaluator, _data_type: t.Optional[str] = None) start_date = t.cast( exp.Expression, evaluator.transform( - parse_one("STR_TO_DATE(@start_ds, '%Y-%m-%d')", dialect="clickhouse") + exp.StrToDate( + this=MacroVar(this="start_ds"), + format=exp.Literal(this="%Y-%m-%d", is_string=True), + ) ), ) return evaluator.transform( time_aggregation_bucket(evaluator, start_date, time_aggregation_interval) ) else: - return evaluator.transform( - parse_one( - "STR_TO_DATE(@end_ds, '%Y-%m-%d') - INTERVAL @rolling_window DAY", - dialect="clickhouse", + # We are documenting that devs do do date filtering with the `between` + # operator so the metrics_end value is inclusive. This means that we + # want to go back the rolling window - 1 + rolling_window = evaluator.locals.get("rolling_window") + if rolling_window is None: + raise Exception( + "metrics_start used in a non metrics model. Model was not supplied a rolling_window" + ) + else: + rolling_window = t.cast(int, rolling_window) + rolling_window = rolling_window - 1 + rolling_unit = evaluator.locals.get("rolling_unit", "") + if rolling_unit not in ["day", "month", "year", "week", "quarter"]: + raise Exception( + f'Invalid use of metrics_start. Cannot use rolling_unit="{rolling_unit}"' ) + + # Calculated rolling start + rolling_start = exp.Sub( + this=exp.Cast( + this=exp.StrToDate( + this=MacroVar(this="end_ds"), + format=exp.Literal(this="%Y-%m-%d", is_string=True), + ), + to=exp.DataType(this=exp.DataType.Type.DATETIME), + _type=exp.DataType(this=exp.DataType.Type.DATETIME), + ), + expression=exp.Interval( + this=exp.Paren( + this=exp.Literal(this=str(rolling_window), is_string=False), + ), + unit=exp.Var(this=rolling_unit.upper()), + ), ) + return evaluator.transform(rolling_start) def metrics_end(evaluator: MacroEvaluator, _data_type: t.Optional[str] = None): @@ -200,20 +236,35 @@ def metrics_end(evaluator: MacroEvaluator, _data_type: t.Optional[str] = None): "weekly": "week", "monthly": "month", } + time_agg_end = exp.Add( + this=exp.Cast( + this=exp.StrToDate( + this=MacroVar(this="end_ds"), + format=exp.Literal( + this="%Y-%m-%d", + is_string=True, + ), + ), + to=exp.DataType(this=exp.DataType.Type.DATETIME), + _type=exp.DataType(this=exp.DataType.Type.DATETIME), + ), + expression=exp.Interval( + this=exp.Literal(this="1", is_string=True), + unit=exp.Var(this=to_interval[time_aggregation_interval]), + ), + ) end_date = t.cast( exp.Expression, - evaluator.transform( - parse_one( - f"STR_TO_DATE(@end_ds, '%Y-%m-%d') + INTERVAL 1 {to_interval[time_aggregation_interval]}", - dialect="clickhouse", - ) - ), + time_agg_end, ) return evaluator.transform( time_aggregation_bucket(evaluator, end_date, time_aggregation_interval) ) return evaluator.transform( - parse_one("STR_TO_DATE(@end_ds, '%Y-%m-%d')", dialect="clickhouse") + exp.StrToDate( + this=MacroVar(this="end_ds"), + format=exp.Literal(this="%Y-%m-%d", is_string=True), + ) ) @@ -221,6 +272,7 @@ def metrics_entity_type_col( evaluator: MacroEvaluator, format_str: str, table_alias: exp.Expression | str | None = None, + include_column_alias: exp.Expression | bool = False, ): names = [] @@ -228,19 +280,23 @@ def metrics_entity_type_col( format_str = format_str.this if table_alias: - if isinstance(table_alias, exp.TableAlias): - names.append(table_alias.this) + if isinstance(table_alias, (exp.TableAlias, exp.Literal, exp.Column)): + if isinstance(table_alias.this, exp.Identifier): + names.append(table_alias.this.this) + else: + names.append(table_alias.this) elif isinstance(table_alias, str): names.append(table_alias) - elif isinstance(table_alias, exp.Literal): - names.append(table_alias.this) else: names.append(table_alias.sql()) column_name = format_str.format( entity_type=evaluator.locals.get("entity_type", "artifact") ) names.append(column_name) - return sqlglot.to_column(f"{'.'.join(names)}") + column = sqlglot.to_column(f"{'.'.join(names)}", quoted=True) + if include_column_alias: + return column.as_(column_name) + return column def metrics_entity_type_alias( @@ -252,3 +308,37 @@ def metrics_entity_type_alias( entity_type=evaluator.locals.get("entity_type", "artifact") ) return exp.alias_(to_alias, alias_name) + + +def metrics_peer_ref( + evaluator: MacroEvaluator, + name: str, + *, + entity_type: t.Optional[exp.Expression] = None, + window: t.Optional[exp.Expression] = None, + unit: t.Optional[exp.Expression] = None, + time_aggregation: t.Optional[exp.Expression] = None, +): + entity_type_val = ( + t.cast(str, exp_literal_to_py_literal(entity_type)) + if entity_type + else evaluator.locals.get("entity_type", "") + ) + window_val = int(exp_literal_to_py_literal(window)) if window else None + unit_val = t.cast(str, exp_literal_to_py_literal(unit)) if unit else None + time_aggregation_val = ( + t.cast(str, exp_literal_to_py_literal(time_aggregation)) + if time_aggregation + else None + ) + peer_db = t.cast(dict, evaluator.locals.get("$$peer_db")) + peer_table_map = t.cast(dict, evaluator.locals.get("$$peer_table_map")) + + ref = PeerMetricDependencyRef( + name=name, + entity_type=entity_type_val, + window=window_val, + unit=unit_val, + time_aggregation=time_aggregation_val, + ) + return exp.to_table(f"{peer_db}.{to_actual_table_name(ref, peer_table_map)}") diff --git a/warehouse/metrics_tools/factory/test_factory.py b/warehouse/metrics_tools/factory/test_factory.py new file mode 100644 index 000000000..d4bd06a0e --- /dev/null +++ b/warehouse/metrics_tools/factory/test_factory.py @@ -0,0 +1,269 @@ +from metrics_tools.utils.dataframes import as_pandas_df +from metrics_tools.utils.testing import duckdb_df_context +import os +import arrow +from metrics_tools.factory.gen_data import MetricsDBFixture +import pytest + +from metrics_tools.runner import MetricsRunner +from metrics_tools.definition import MetricQueryDef, RollingConfig +from .factory import TimeseriesMetrics + + +CURR_DIR = os.path.dirname(__file__) + + +@pytest.fixture +def timeseries_duckdb(): + # initial duckdb with some basic timeseries metrics + fixture = MetricsDBFixture.create() + fixture.populate_artifacts_and_projects( + projects={ + "p_0": ["service_0", "service_1", "service_2", "repo_0", "repo_1"], + "p_1": ["service_3", "service_4", "repo_2", "repo_3"], + "p_2": ["service_5", "repo_5"], + }, + collections={ + "c_0": ["p_0"], + "c_1": ["p_1", "p_2"], + "c_3": ["p_0", "p_1", "p_2"], + }, + ) + + fixture.generate_daily_events( + "2023-01-01", "2024-12-31", "VISIT", "user_0", "service_0" + ) + fixture.generate_daily_events( + "2023-01-01", "2024-12-31", "VISIT", "user_0", "service_1" + ) + fixture.generate_daily_events( + "2023-01-01", "2024-12-31", "VISIT", "user_1", "service_1" + ) + fixture.generate_daily_events( + "2023-01-01", "2024-12-31", "VISIT", "user_2", "service_2" + ) + + for ft_dev_index in range(5): + dev_name = f"ft_dev_{ft_dev_index}" + fixture.generate_daily_events( + "2023-01-01", "2024-12-31", "COMMIT_CODE", dev_name, "repo_0" + ) + + # Change in developers + for ft_dev_index in range(5, 10): + dev_name = f"ft_dev_{ft_dev_index}" + fixture.generate_daily_events( + "2023-01-01", + "2024-12-31", + "COMMIT_CODE", + dev_name, + "repo_0", + date_filter=lambda df: as_pandas_df(df[df["bucket_day"].dt.day <= 7]), + ) + + for ft_dev_index in range(5): + dev_name = f"ft_dev_{ft_dev_index}" + fixture.generate_daily_events( + "2023-01-01", "2024-12-31", "COMMIT_CODE", dev_name, "repo_1" + ) + + # User that commits only once a month + for pt_dev_index in range(10): + dev_name = f"pt_dev_{pt_dev_index}" + fixture.generate_daily_events( + "2023-01-01", + "2024-12-31", + "COMMIT_CODE", + dev_name, + "repo_0", + date_filter=lambda df: as_pandas_df(df[df["bucket_day"].dt.day % 5 == 0]), + ) + yield fixture + fixture._conn.close() + + +@pytest.fixture +def timeseries_metrics_to_test(): + return TimeseriesMetrics.from_raw_options( + start="2024-01-01", + model_prefix="timeseries", + metric_queries={ + "visits": MetricQueryDef( + ref="visits.sql", + time_aggregations=["daily", "weekly", "monthly"], + ), + "developer_active_days": MetricQueryDef( + ref="active_days.sql", + vars={ + "activity_event_types": ["COMMIT_CODE"], + }, + rolling=RollingConfig( + windows=[7, 14], + unit="day", + cron="@daily", # This determines how often this is calculated + ), + # entity_types=["artifact", "project", "collection"], + entity_types=["artifact", "project", "collection"], + is_intermediate=True, + ), + "developer_classifications": MetricQueryDef( + ref="developer_activity_classification.sql", + vars={ + "full_time_ratio": 10 / 30, + }, + rolling=RollingConfig( + windows=[7, 14], + unit="day", + cron="@daily", + ), + entity_types=["artifact", "project", "collection"], + ), + "change_in_7_day_developer_activity": MetricQueryDef( + ref="change_in_developers.sql", + vars={ + "comparison_interval": 7, + }, + rolling=RollingConfig( + windows=[2], + unit="period", + cron="@daily", + ), + entity_types=["artifact", "project", "collection"], + ), + }, + default_dialect="clickhouse", + queries_dir=os.path.join(CURR_DIR, "fixtures/metrics"), + timeseries_sources=["events_daily_to_artifact"], + ) + + +def test_timeseries_metric_rendering(timeseries_metrics_to_test: TimeseriesMetrics): + queries = timeseries_metrics_to_test.generate_queries() + for name, query_config in queries.items(): + query = query_config["rendered_query"] + print(f"Query {name}:") + print(query.sql("duckdb", pretty=True)) + + table_names = set(queries.keys()) + assert table_names == { + "visits_to_artifact_daily", + "visits_to_project_daily", + "visits_to_collection_daily", + "visits_to_artifact_weekly", + "visits_to_project_weekly", + "visits_to_collection_weekly", + "visits_to_artifact_monthly", + "visits_to_project_monthly", + "visits_to_collection_monthly", + "developer_active_days_to_artifact_over_7_day_window", + "developer_active_days_to_project_over_7_day_window", + "developer_active_days_to_collection_over_7_day_window", + "developer_active_days_to_artifact_over_14_day_window", + "developer_active_days_to_project_over_14_day_window", + "developer_active_days_to_collection_over_14_day_window", + "developer_classifications_to_artifact_over_7_day_window", + "developer_classifications_to_project_over_7_day_window", + "developer_classifications_to_collection_over_7_day_window", + "developer_classifications_to_artifact_over_14_day_window", + "developer_classifications_to_project_over_14_day_window", + "developer_classifications_to_collection_over_14_day_window", + "change_in_7_day_developer_activity_to_artifact_over_2_period_window", + "change_in_7_day_developer_activity_to_project_over_2_period_window", + "change_in_7_day_developer_activity_to_collection_over_2_period_window", + } + + +def test_runner( + timeseries_metrics_to_test: TimeseriesMetrics, timeseries_duckdb: MetricsDBFixture +): + base_locals = {"oso_source": "sources"} + connection = timeseries_duckdb._conn + + for _, query_config in timeseries_metrics_to_test.generate_ordered_queries(): + ref = query_config["ref"] + locals = query_config["vars"].copy() + locals.update(base_locals) + runner = MetricsRunner.create_duckdb_execution_context( + connection, + [query_config["rendered_query"]], + ref, + locals, + ) + runner.commit( + arrow.get("2024-01-01").datetime, + arrow.get("2024-01-16").datetime, + f"metrics.{query_config['table_name']}", + ) + + # Data assertions + with duckdb_df_context( + connection, + """ + SELECT * + FROM metrics.visits_to_artifact_daily + where metrics_sample_date = '2024-01-15' + """, + ) as df: + df = df[df["to_artifact_id"] == "service_0"] + assert df.iloc[0]["amount"] == 1 + + with duckdb_df_context( + connection, + """ + SELECT * + FROM metrics.developer_active_days_to_artifact_over_7_day_window + where metrics_sample_date = '2024-01-08' + """, + ) as df: + repo0_df = as_pandas_df(df[df["to_artifact_id"] == "repo_0"]) + dev0_df = as_pandas_df(repo0_df[repo0_df["from_artifact_id"] == "ft_dev_0"]) + assert dev0_df.iloc[0]["amount"] == 7 + dev5_df = as_pandas_df(repo0_df[repo0_df["from_artifact_id"] == "ft_dev_5"]) + assert dev5_df.iloc[0]["amount"] == 6 + + with duckdb_df_context( + connection, + """ + SELECT * + FROM metrics.developer_active_days_to_project_over_14_day_window + where metrics_sample_date = '2024-01-15' + """, + ) as df: + p0_df = as_pandas_df(df[df["to_project_id"] == "p_0"]) + p0_dev0_df = as_pandas_df(p0_df[p0_df["from_artifact_id"] == "ft_dev_0"]) + assert p0_dev0_df.iloc[0]["amount"] == 14 + + with duckdb_df_context( + connection, + """ + SELECT * + FROM metrics.developer_classifications_to_artifact_over_14_day_window + where metrics_sample_date = '2024-01-15' and metric = 'full_time_developers_over_14_day_window' + """, + ) as df: + df = df[df["to_artifact_id"] == "repo_0"] + assert df.iloc[0]["amount"] == 10 + + with duckdb_df_context( + connection, + """ + SELECT * + FROM metrics.developer_classifications_to_artifact_over_14_day_window + where metrics_sample_date = '2024-01-15' + """, + ) as df: + assert len(df) == 6 + df = df[df["to_artifact_id"] == "repo_0"] + assert df.iloc[0]["amount"] == 10 + + with duckdb_df_context( + connection, + """ + SELECT * + FROM metrics.change_in_7_day_developer_activity_to_artifact_over_2_period_window + where metrics_sample_date = '2024-01-15' + """, + ) as df: + assert len(df) == 6 + df = df[df["to_artifact_id"] == "repo_0"] + assert df.iloc[0]["amount"] == -5 diff --git a/warehouse/metrics_tools/intermediate.py b/warehouse/metrics_tools/intermediate.py new file mode 100644 index 000000000..fdb7c1b6f --- /dev/null +++ b/warehouse/metrics_tools/intermediate.py @@ -0,0 +1,133 @@ +import typing as t + +from metrics_tools.models import ( + create_basic_python_env, +) + +from sqlmesh import EngineAdapter +from sqlmesh.core.macros import MacroEvaluator, MacroRegistry, macro, RuntimeStage +from sqlmesh.core.dialect import parse_one, MacroVar, MacroFunc, parse +from sqlglot import exp + + +def run_macro_evaluator( + query: str | t.List[exp.Expression] | exp.Expression, + additional_macros: t.Optional[MacroRegistry] = None, + variables: t.Optional[t.Dict[str, t.Any]] = None, + runtime_stage: RuntimeStage = RuntimeStage.LOADING, + engine_adapter: t.Optional[EngineAdapter] = None, + default_catalog: t.Optional[str] = None, +): + if isinstance(query, str): + parsed = parse(query) + elif isinstance(query, exp.Expression): + parsed = [query] + else: + parsed = query + + macros = t.cast(MacroRegistry, macro.get_registry().copy()) + if additional_macros: + macros.update(additional_macros) + + env = create_basic_python_env({}, "", "", macros=macros, variables=variables) + + evaluator = MacroEvaluator( + python_env=env, + runtime_stage=runtime_stage, + default_catalog=default_catalog, + ) + + if engine_adapter: + evaluator.locals["engine_adapter"] = engine_adapter + + result: t.List[exp.Expression] = [] + for part in parsed: + transformed = evaluator.transform(part) + if not transformed: + continue + if isinstance(transformed, list): + result.extend(transformed) + else: + result.append(transformed) + return result + + +def run_intermediate_macro_evaluator( + query: str | exp.Expression, + macros: t.Optional[MacroRegistry] = None, + variables: t.Optional[t.Dict[str, t.Any]] = None, +): + macros = macros or t.cast(MacroRegistry, {}) + variables = variables or {} + + env = create_basic_python_env( + {}, + "", + "", + macros=macros, + variables=variables, + ) + evaluator = MacroEvaluator(python_env=env) + + if isinstance(query, str): + parsed = parse_one(query) + else: + parsed = query + + def replace_all_macro_vars(node: exp.Expression): + if not isinstance(node, MacroVar): + return node + # If the variables are set in this environment then allow them to be + # evaluated + if node.this in variables: + return node + # All others are unknown + return exp.Anonymous( + this="$$INTERMEDIATE_MACRO_VAR", + expressions=[exp.Literal(this=node.this, is_string=True)], + ) + + def replace_all_macro_funcs(node: exp.Expression): + if not isinstance(node, MacroFunc): + return node + # if this is an anonymous function then it's a macrofunc + if isinstance(node.this, exp.Anonymous): + if node.this.this in macros: + return node + else: + recursed_transform = node.this.transform(replace_all_macro_funcs) + return exp.Anonymous( + this="$$INTERMEDIATE_MACRO_FUNC", + expressions=[ + recursed_transform, + ], + ) + raise Exception("expected node.this to be an anonymous expression") + + parsed = parsed.transform(replace_all_macro_vars) + parsed = parsed.transform(replace_all_macro_funcs) + + intermediate_evaluation = evaluator.transform(parsed) + if not intermediate_evaluation: + return intermediate_evaluation + + def restore_intermediate(node: exp.Expression): + if not isinstance(node, exp.Anonymous): + return node + if not node.this.startswith("$$INTERMEDIATE"): + return node + if node.this == "$$INTERMEDIATE_MACRO_VAR": + return MacroVar(this=node.expressions[0].this) + elif node.this == "$$INTERMEDIATE_MACRO_FUNC": + # Restore all recursive expressions + recursed_transform = node.expressions[0].transform(restore_intermediate) + return MacroFunc(this=recursed_transform) + else: + raise Exception(f"Unknown anonymous intermediate reference `{node.this}`") + + if not isinstance(intermediate_evaluation, list): + intermediate_evaluation = [intermediate_evaluation] + final: t.List[exp.Expression] = [] + for int_expression in intermediate_evaluation: + final.append(int_expression.transform(restore_intermediate)) + return final diff --git a/warehouse/metrics_tools/joiner/__init__.py b/warehouse/metrics_tools/joiner/__init__.py new file mode 100644 index 000000000..57897766e --- /dev/null +++ b/warehouse/metrics_tools/joiner/__init__.py @@ -0,0 +1,125 @@ +import typing as t + +from sqlglot import exp +from sqlmesh.core.dialect import MacroVar + +from metrics_tools.transformer import SQLTransformer, Transform + + +class JoinerTransform(Transform): + def __init__(self, entity_type: str): + self._entity_type = entity_type + + def __call__(self, query: t.List[exp.Expression]) -> t.List[exp.Expression]: + entity_type = self._entity_type + if entity_type == "artifact": + return query + + def _transform(node: exp.Expression): + if not isinstance(node, exp.Select): + return node + select = node + + # Check if this using the timeseries source tables as a join or the from + is_using_timeseries_source = False + for table in select.find_all(exp.Table): + if table.this.this in ["events_daily_to_artifact"]: + is_using_timeseries_source = True + if not is_using_timeseries_source: + return node + + for i in range(len(select.expressions)): + ex = select.expressions[i] + if not isinstance(ex, exp.Alias): + continue + + # If to_artifact_id is being aggregated then it's time to rewrite + if isinstance(ex.this, exp.Column) and isinstance( + ex.this.this, exp.Identifier + ): + if ex.this.this.this == "to_artifact_id": + updated_select = select.copy() + current_from = t.cast(exp.From, updated_select.args.get("from")) + assert isinstance(current_from.this, exp.Table) + current_table = current_from.this + current_alias = current_table.alias + + # Add a join to this select + updated_select = updated_select.join( + exp.Table( + this=exp.to_identifier("artifacts_by_project_v1"), + db=MacroVar(this="oso_source"), + ), + on=f"{current_alias}.to_artifact_id = artifacts_by_project_v1.artifact_id", + join_type="inner", + ) + + new_to_entity_id_col = exp.to_column( + "artifacts_by_project_v1.project_id", quoted=True + ) + new_to_entity_alias = exp.to_identifier( + "to_project_id", quoted=True + ) + + if entity_type == "collection": + updated_select = updated_select.join( + exp.Table( + this=exp.to_identifier("projects_by_collection_v1"), + db=MacroVar(this="oso_source"), + ), + on="artifacts_by_project_v1.project_id = projects_by_collection_v1.project_id", + join_type="inner", + ) + + new_to_entity_id_col = exp.to_column( + "projects_by_collection_v1.collection_id", quoted=True + ) + new_to_entity_alias = exp.to_identifier( + "to_collection_id", quoted=True + ) + + # replace the select and the grouping with the project id in the joined table + to_artifact_id_col_sel = t.cast( + exp.Alias, updated_select.expressions[i] + ) + current_to_artifact_id_col = t.cast( + exp.Column, to_artifact_id_col_sel.this + ) + + to_artifact_id_col_sel.replace( + exp.alias_( + new_to_entity_id_col, + alias=new_to_entity_alias, + ) + ) + + group = t.cast(exp.Group, updated_select.args.get("group")) + for group_idx in range(len(group.expressions)): + group_col = t.cast(exp.Column, group.expressions[group_idx]) + if group_col == current_to_artifact_id_col: + group_col.replace(new_to_entity_id_col) + + return updated_select + # If nothing happens in the for loop then we didn't find the kind of + # expected select statement + return node + + return list(map(lambda expression: expression.transform(_transform), query)) + + +def joiner_transform( + query: str, + entity_type: str, + rolling_window: t.Optional[int] = None, + rolling_unit: t.Optional[str] = None, + time_aggregation: t.Optional[str] = None, +): + if entity_type == "artifact": + return SQLTransformer(transforms=[]).transform(query) + transformer = SQLTransformer( + transforms=[ + # Semantic transform + JoinerTransform(entity_type) + ] + ) + return transformer.transform(query) diff --git a/warehouse/metrics_tools/joiner/fixtures/basic/expected_artifact.sql b/warehouse/metrics_tools/joiner/fixtures/basic/expected_artifact.sql new file mode 100644 index 000000000..13129660f --- /dev/null +++ b/warehouse/metrics_tools/joiner/fixtures/basic/expected_artifact.sql @@ -0,0 +1,4 @@ +select events.to_artifact_id, + SUM(events.amount) as amount +from metrics.events_daily_to_artifact as events +group by events.to_artifact_id \ No newline at end of file diff --git a/warehouse/metrics_tools/joiner/fixtures/basic/expected_project.sql b/warehouse/metrics_tools/joiner/fixtures/basic/expected_project.sql new file mode 100644 index 000000000..2d48e2d66 --- /dev/null +++ b/warehouse/metrics_tools/joiner/fixtures/basic/expected_project.sql @@ -0,0 +1,5 @@ +select artifacts_by_project_v1.project_id as to_project_id, + SUM(events.amount) as amount +from metrics.events_daily_to_artifact as events + inner join @oso_source.artifacts_by_project_v1 on events.to_artifact_id = artifacts_by_project_v1.artifact_id +group by artifacts_by_project_v1.project_id \ No newline at end of file diff --git a/warehouse/metrics_tools/joiner/fixtures/basic/input.sql b/warehouse/metrics_tools/joiner/fixtures/basic/input.sql new file mode 100644 index 000000000..13129660f --- /dev/null +++ b/warehouse/metrics_tools/joiner/fixtures/basic/input.sql @@ -0,0 +1,4 @@ +select events.to_artifact_id, + SUM(events.amount) as amount +from metrics.events_daily_to_artifact as events +group by events.to_artifact_id \ No newline at end of file diff --git a/warehouse/metrics_tools/joiner/test_joiner.py b/warehouse/metrics_tools/joiner/test_joiner.py new file mode 100644 index 000000000..011a5a057 --- /dev/null +++ b/warehouse/metrics_tools/joiner/test_joiner.py @@ -0,0 +1,24 @@ +import os +from . import joiner_transform +from metrics_tools.utils import assert_same_sql + + +CURR_DIR = os.path.dirname(__file__) +FIXTURES_DIR = os.path.abspath(os.path.join(CURR_DIR, "fixtures")) + + +def get_sql_fixture(sql_path: str) -> str: + return open(os.path.join(FIXTURES_DIR, sql_path), "r").read() + + +def test_factory(): + input = get_sql_fixture("basic/input.sql") + artifact = joiner_transform(input, "artifact") + assert artifact is not None + assert len(artifact) == 1 + assert_same_sql(artifact[0], get_sql_fixture("basic/expected_artifact.sql")) + + project = joiner_transform(input, "project") + assert project is not None + assert len(project) == 1 + assert_same_sql(project[0], get_sql_fixture("basic/expected_project.sql")) diff --git a/warehouse/metrics_tools/lib/__init__.py b/warehouse/metrics_tools/lib/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/warehouse/metrics_tools/lib/factories/factory.py b/warehouse/metrics_tools/lib/factories/factory.py deleted file mode 100644 index 93a0fbdb5..000000000 --- a/warehouse/metrics_tools/lib/factories/factory.py +++ /dev/null @@ -1,293 +0,0 @@ -import inspect -import os -import typing as t - -from metrics_tools.lib.factories.definition import ( - GeneratedArtifactConfig, - MetricQuery, - TimeseriesMetricsOptions, - generated_entity, - join_all_of_entity_type, - reference_to_str, -) -from metrics_tools.models import GeneratedModel -from sqlglot import exp -from sqlmesh.core.model import ModelKindName -from sqlmesh.utils.date import TimeLike - -from .macros import ( - metrics_end, - metrics_entity_type_col, - metrics_name, - metrics_sample_date, - metrics_start, - relative_window_sample_date, - metrics_entity_type_alias, -) - -CURR_DIR = os.path.dirname(__file__) -QUERIES_DIR = os.path.abspath(os.path.join(CURR_DIR, "../../oso_metrics")) - -type ExtraVarBaseType = str | int | float -type ExtraVarType = ExtraVarBaseType | t.List[ExtraVarBaseType] - - -TIME_AGGREGATION_TO_CRON = { - "daily": "@daily", - "monthly": "@monthly", - "weekly": "@weekly", -} -METRICS_COLUMNS_BY_ENTITY: t.Dict[str, t.Dict[str, exp.DataType]] = { - "artifact": { - "metrics_sample_date": exp.DataType.build("DATE", dialect="clickhouse"), - "event_source": exp.DataType.build("String", dialect="clickhouse"), - "to_artifact_id": exp.DataType.build("String", dialect="clickhouse"), - "from_artifact_id": exp.DataType.build("String", dialect="clickhouse"), - "metric": exp.DataType.build("String", dialect="clickhouse"), - "amount": exp.DataType.build("Float64", dialect="clickhouse"), - }, - "project": { - "metrics_sample_date": exp.DataType.build("DATE", dialect="clickhouse"), - "event_source": exp.DataType.build("String", dialect="clickhouse"), - "to_project_id": exp.DataType.build("String", dialect="clickhouse"), - "from_artifact_id": exp.DataType.build("String", dialect="clickhouse"), - "metric": exp.DataType.build("String", dialect="clickhouse"), - "amount": exp.DataType.build("Float64", dialect="clickhouse"), - }, - "collection": { - "metrics_sample_date": exp.DataType.build("DATE", dialect="clickhouse"), - "event_source": exp.DataType.build("String", dialect="clickhouse"), - "to_collection_id": exp.DataType.build("String", dialect="clickhouse"), - "from_artifact_id": exp.DataType.build("String", dialect="clickhouse"), - "metric": exp.DataType.build("String", dialect="clickhouse"), - "amount": exp.DataType.build("Float64", dialect="clickhouse"), - }, -} - - -def generate_models_from_query( - calling_file: str, - query: MetricQuery, - default_dialect: str, - peer_table_map: t.Dict[str, str], - start: TimeLike, - timeseries_sources: t.List[str], -): - # Turn the source into a dict so it can be used in the sqlmesh context - query_def_as_input = query._source.to_input() - query_reference_name = query.reference_name - refs = query.provided_dependency_refs - - all_tables: t.Dict[str, t.List[str]] = { - "artifact": [], - "project": [], - "collection": [], - } - - for ref in refs: - cron = "@daily" - time_aggregation = ref.get("time_aggregation") - window = ref.get("window") - if time_aggregation: - cron = TIME_AGGREGATION_TO_CRON[time_aggregation] - else: - if not window: - raise Exception("window or time_aggregation must be set") - assert query._source.rolling - cron = query._source.rolling["cron"] - - # Clean up the peer_table_map (this is a hack to prevent unnecessary - # runs when the metrics factory is updated) - query_dependencies = query.dependencies(ref, peer_table_map) - # So much of this needs to be refactored but for now this is to ensure - # that in some way that the dict doesn't randomly "change". I don't - # think this will be consistent between python machines but let's see - # for now. - reduced_peer_table_tuples = [(k, peer_table_map[k]) for k in query_dependencies] - reduced_peer_table_tuples.sort() - - config = GeneratedArtifactConfig( - query_reference_name=query_reference_name, - query_def_as_input=query_def_as_input, - default_dialect=default_dialect, - peer_table_tuples=reduced_peer_table_tuples, - ref=ref, - timeseries_sources=timeseries_sources, - ) - - table_name = query.table_name(ref) - all_tables[ref["entity_type"]].append(table_name) - columns = METRICS_COLUMNS_BY_ENTITY[ref["entity_type"]] - additional_macros = [ - metrics_entity_type_col, - metrics_entity_type_alias, - relative_window_sample_date, - (metrics_name, ["metric_name"]), - metrics_sample_date, - metrics_end, - metrics_start, - ] - - kind_common = {"batch_size": 1} - partitioned_by = ("day(metrics_sample_date)",) - - # Due to how the schedulers work for sqlmesh we actually can't batch if - # we're using a weekly cron for a time aggregation. In order to have - # this work we just adjust the start/end time for the - # metrics_start/metrics_end and also give a large enough batch time to - # fit a few weeks. This ensures there's on missing data - if time_aggregation == "weekly": - kind_common = {"batch_size": 182, "lookback": 7} - if time_aggregation == "monthly": - kind_common = {"batch_size": 6} - partitioned_by = ("month(metrics_sample_date)",) - if time_aggregation == "daily": - kind_common = {"batch_size": 180} - - if ref["entity_type"] == "artifact": - GeneratedModel.create( - func=generated_entity, - source=query._source.raw_sql, - entrypoint_path=calling_file, - config=config, - name=f"metrics.{table_name}", - kind={ - "name": ModelKindName.INCREMENTAL_BY_TIME_RANGE, - "time_column": "metrics_sample_date", - **kind_common, - }, - dialect="clickhouse", - columns=columns, - grain=[ - "metric", - "to_artifact_id", - "from_artifact_id", - "metrics_sample_date", - ], - cron=cron, - start=start, - additional_macros=additional_macros, - partitioned_by=partitioned_by, - ) - - if ref["entity_type"] == "project": - GeneratedModel.create( - func=generated_entity, - source=query._source.raw_sql, - entrypoint_path=calling_file, - config=config, - name=f"metrics.{table_name}", - kind={ - "name": ModelKindName.INCREMENTAL_BY_TIME_RANGE, - "time_column": "metrics_sample_date", - **kind_common, - }, - dialect="clickhouse", - columns=columns, - grain=[ - "metric", - "to_project_id", - "from_artifact_id", - "metrics_sample_date", - ], - cron=cron, - start=start, - additional_macros=additional_macros, - partitioned_by=partitioned_by, - ) - if ref["entity_type"] == "collection": - GeneratedModel.create( - func=generated_entity, - source=query._source.raw_sql, - entrypoint_path=calling_file, - config=config, - name=f"metrics.{table_name}", - kind={ - "name": ModelKindName.INCREMENTAL_BY_TIME_RANGE, - "time_column": "metrics_sample_date", - **kind_common, - }, - dialect="clickhouse", - columns=columns, - grain=[ - "metric", - "to_collection_id", - "from_artifact_id", - "metrics_sample_date", - ], - cron=cron, - start=start, - additional_macros=additional_macros, - partitioned_by=partitioned_by, - ) - - return all_tables - - -def timeseries_metrics( - **raw_options: t.Unpack[TimeseriesMetricsOptions], -): - calling_file = inspect.stack()[1].filename - timeseries_sources = raw_options.get( - "timeseries_sources", ["events_daily_to_artifact"] - ) - assert timeseries_sources is not None - - metrics_queries = [ - MetricQuery.load( - name=name, - default_dialect=raw_options.get("default_dialect", "clickhouse"), - source=query_def, - ) - for name, query_def in raw_options["metric_queries"].items() - ] - - # Build the dependency graph of all the metrics queries - peer_table_map: t.Dict[str, str] = {} - for query in metrics_queries: - provided_refs = query.provided_dependency_refs - for ref in provided_refs: - peer_table_map[reference_to_str(ref)] = query.table_name(ref) - - all_tables: t.Dict[str, t.List[str]] = { - "artifact": [], - "project": [], - "collection": [], - } - - # Generate the models - for query in metrics_queries: - tables = generate_models_from_query( - calling_file, - query, - default_dialect=raw_options.get("default_dialect", "clickhouse"), - peer_table_map=peer_table_map, - start=raw_options["start"], - timeseries_sources=timeseries_sources, - ) - if not query.is_intermediate: - for entity_type in all_tables.keys(): - all_tables[entity_type] = all_tables[entity_type] + tables[entity_type] - - # Join all of the models of the same entity type into the same view model - for entity_type, tables in all_tables.items(): - GeneratedModel.create( - func=join_all_of_entity_type, - entrypoint_path=calling_file, - config={ - "db": "metrics", - "tables": tables, - "columns": list(METRICS_COLUMNS_BY_ENTITY[entity_type].keys()), - }, - name=f"metrics.timeseries_metrics_to_{entity_type}", - kind="VIEW", - dialect="clickhouse", - start=raw_options["start"], - columns={ - k: METRICS_COLUMNS_BY_ENTITY[entity_type][k] - for k in filter( - lambda col: col not in ["event_source"], - METRICS_COLUMNS_BY_ENTITY[entity_type].keys(), - ) - }, - ) diff --git a/warehouse/metrics_tools/lib/local/__init__.py b/warehouse/metrics_tools/local/__init__.py similarity index 100% rename from warehouse/metrics_tools/lib/local/__init__.py rename to warehouse/metrics_tools/local/__init__.py diff --git a/warehouse/metrics_tools/lib/local/utils.py b/warehouse/metrics_tools/local/utils.py similarity index 100% rename from warehouse/metrics_tools/lib/local/utils.py rename to warehouse/metrics_tools/local/utils.py diff --git a/warehouse/metrics_tools/models.py b/warehouse/metrics_tools/models.py index 203943fd0..65f889468 100644 --- a/warehouse/metrics_tools/models.py +++ b/warehouse/metrics_tools/models.py @@ -4,6 +4,7 @@ import textwrap import typing as t from pathlib import Path +import uuid from sqlglot import exp from sqlmesh.core import constants as c @@ -17,6 +18,7 @@ ExecutableKind, build_env, serialize_env, + normalize_source, ) logger = logging.getLogger(__name__) @@ -111,9 +113,9 @@ def model( macros = t.cast(MacroRegistry, macros.copy()) for additional_macro in self.additional_macros: if isinstance(additional_macro, tuple): - macros.update(create_unregistered_macro(*additional_macro)) + macros.update(create_unregistered_wrapped_macro(*additional_macro)) else: - macros.update(create_unregistered_macro(additional_macro)) + macros.update(create_unregistered_wrapped_macro(additional_macro)) common_kwargs: t.Dict[str, t.Any] = dict( defaults=defaults, @@ -166,8 +168,58 @@ def escape_triple_quotes(input_string: str) -> str: return escaped_string +def create_unregistered_macro_registry( + macros: t.List[t.Callable | t.Tuple[t.Callable, t.List[str]]] +): + registry = MacroRegistry(f"macro_registry_{uuid.uuid4().hex}") + for additional_macro in macros: + if isinstance(additional_macro, tuple): + registry.update(create_unregistered_wrapped_macro(*additional_macro)) + else: + registry.update(create_unregistered_wrapped_macro(additional_macro)) + return registry + + def create_unregistered_macro( - func: t.Callable, aliases: t.Optional[t.List[str]] = None + func: t.Callable, + aliases: t.Optional[t.List[str]] = None, +): + aliases = aliases or [] + name = func.__name__ + source = normalize_source(func) + registry: t.Dict[str, ExecutableOrMacro] = { + name: Executable( + name=name, + payload=source, + kind=ExecutableKind.DEFINITION, + path=f"/__generated/macro/{name}.py", + alias=None, + is_metadata=False, + ), + } + + for alias_name in aliases: + alias_as_str = textwrap.dedent( + f""" + def {alias_name}(evaluator, *args, **kwargs): + return {name}(evaluator, *args, **kwargs) + """ + ) + registry[alias_name] = Executable( + name=alias_name, + payload=alias_as_str, + kind=ExecutableKind.DEFINITION, + path=f"/__generated/macro/{name}.py", + alias=None, + is_metadata=False, + ) + + return registry + + +def create_unregistered_wrapped_macro( + func: t.Callable, + aliases: t.Optional[t.List[str]] = None, ) -> t.Dict[str, ExecutableOrMacro]: aliases = aliases or [] entrypoint_name = func.__name__ @@ -208,19 +260,18 @@ def {alias_name}(evaluator, *args, **kwargs): return registry -def create_import_call_env( - name: str, - import_module: str, - config: t.Mapping[str, t.Any], - source: str, +def create_basic_python_env( env: t.Dict[str, t.Any], - path: Path, - project_path: Path, + path: str | Path = "", + project_path: str | Path = "", macros: t.Optional[MacroRegistry] = None, - entrypoint_name: str = "macro_entrypoint", additional_macros: t.Optional[MacroRegistry] = None, variables: t.Optional[t.Dict[str, t.Any]] = None, ): + if isinstance(path, str): + path = Path(path) + if isinstance(project_path, str): + project_path = Path(project_path) serialized = env.copy() macros = macros or macro.get_registry() @@ -229,6 +280,50 @@ def create_import_call_env( macros = t.cast(MacroRegistry, macros.copy()) macros.update(additional_macros) + python_env = {} + for name, used_macro in macros.items(): + if isinstance(used_macro, Executable): + serialized[name] = used_macro + elif not hasattr(used_macro, c.SQLMESH_BUILTIN): + build_env(used_macro.func, env=python_env, name=name, path=path) + + if variables: + for name, value in variables.items(): + serialized[name] = Executable.value(value) + + serialized.update(serialize_env(python_env, project_path)) + + return serialized + + +def create_import_call_env( + name: str, + import_module: str, + config: t.Mapping[str, t.Any], + source: str, + env: t.Dict[str, t.Any], + path: str | Path, + project_path: str | Path, + macros: t.Optional[MacroRegistry] = None, + entrypoint_name: str = "macro_entrypoint", + additional_macros: t.Optional[MacroRegistry] = None, + variables: t.Optional[t.Dict[str, t.Any]] = None, +): + if isinstance(path, str): + path = Path(path) + + if isinstance(project_path, str): + project_path = Path(project_path) + + serialized = create_basic_python_env( + env, + path, + project_path, + macros=macros, + variables=variables, + additional_macros=additional_macros, + ) + entrypoint_as_str = textwrap.dedent( f""" def {entrypoint_name}(evaluator): @@ -256,17 +351,4 @@ def {entrypoint_name}(evaluator): kind=ExecutableKind.IMPORT, ) - python_env = {} - for name, used_macro in macros.items(): - if isinstance(used_macro, Executable): - serialized[name] = used_macro - elif not hasattr(used_macro, c.SQLMESH_BUILTIN): - build_env(used_macro.func, env=python_env, name=name, path=path) - - if variables: - for name, value in variables.items(): - serialized[name] = Executable.value(value) - - serialized.update(serialize_env(python_env, project_path)) - return (entrypoint_name, serialized) diff --git a/warehouse/metrics_tools/runner.py b/warehouse/metrics_tools/runner.py new file mode 100644 index 000000000..161e004aa --- /dev/null +++ b/warehouse/metrics_tools/runner.py @@ -0,0 +1,188 @@ +"""Run metrics queries for a given boundary""" + +import duckdb +import arrow +import logging +from sqlmesh.core.context import ExecutionContext +from sqlmesh.core.config import DuckDBConnectionConfig +from sqlmesh.core.engine_adapter.duckdb import DuckDBEngineAdapter +from sqlmesh.core.macros import RuntimeStage + +from metrics_tools.definition import PeerMetricDependencyRef +from metrics_tools.intermediate import run_macro_evaluator +from metrics_tools.factory.macros import ( + metrics_end, + metrics_sample_date, + metrics_start, +) +from metrics_tools.models import create_unregistered_macro_registry +import pandas as pd +import abc + +from datetime import datetime +import typing as t + +from sqlglot import exp + +logger = logging.getLogger(__name__) + + +def generate_duckdb_create_table(df: pd.DataFrame, table_name: str) -> str: + # Map Pandas dtypes to DuckDB types + dtype_mapping = { + "int64": "BIGINT", + "int32": "INTEGER", + "float64": "DOUBLE", + "float32": "FLOAT", + "bool": "BOOLEAN", + "object": "TEXT", + "datetime64[ns]": "TIMESTAMP", + "datetime64[us]": "TIMESTAMP", + "timedelta64[ns]": "INTERVAL", + } + + # Start the CREATE TABLE statement + create_statement = f"CREATE TABLE IF NOT EXISTS {table_name} (\n" + + # Generate column definitions + column_definitions = [] + for col in df.columns: + col_type = dtype_mapping.get( + str(df[col].dtype), "TEXT" + ) # Default to TEXT for unknown types + column_definitions.append(f" {col} {col_type}") + + # Join the column definitions and finish the statement + create_statement += ",\n".join(column_definitions) + create_statement += "\n);" + + return create_statement + + +class RunnerEngine(abc.ABC): + def execute_df(self, query: str) -> pd.DataFrame: + raise NotImplementedError("execute_df not implemented") + + def execute(self, query: str): + raise NotImplementedError("execute_df not implemented") + + +class ExistingDuckDBConnectionConfig(DuckDBConnectionConfig): + def __init__(self, conn: duckdb.DuckDBPyConnection, *args, **kwargs): + self._existing_connection = conn + super().__init__(*args, **kwargs) + + @property + def _connection_factory(self) -> t.Callable: + return lambda: self._existing_connection + + +class MetricsRunner: + @classmethod + def create_duckdb_execution_context( + cls, + conn: duckdb.DuckDBPyConnection, + query: t.List[exp.Expression], + ref: PeerMetricDependencyRef, + locals: t.Optional[t.Dict[str, t.Any]], + ): + def connection_factory(): + return conn + + engine_adapter = DuckDBEngineAdapter(connection_factory) + context = ExecutionContext(engine_adapter, {}) + return cls(context, query, ref, locals) + + def __init__( + self, + context: ExecutionContext, + query: t.List[exp.Expression], + ref: PeerMetricDependencyRef, + locals: t.Optional[t.Dict[str, t.Any]] = None, + ): + self._context = context + self._query = query + self._ref = ref + self._locals = locals or {} + + def run(self, start: datetime, end: datetime): + """Run metrics for a given period and return the results as pandas dataframes""" + if self._ref.get("time_aggregation"): + return self.run_time_aggregation(start, end) + else: + return self.run_rolling(start, end) + + def run_time_aggregation(self, start: datetime, end: datetime): + rendered_query = self.render_query(start, end) + logger.debug("executing time aggregation", extra={"query": rendered_query}) + return self._context.engine_adapter.fetchdf(rendered_query) + + def run_rolling(self, start: datetime, end: datetime): + df: pd.DataFrame = pd.DataFrame() + logger.debug(f"run_rolling called with start={start} and end={end}") + for day in arrow.Arrow.range("day", arrow.get(start), arrow.get(end)): + rendered_query = self.render_query(day.datetime, day.datetime) + logger.debug( + f"executing rolling window: {rendered_query}", + extra={"query": rendered_query}, + ) + day_result = self._context.engine_adapter.fetchdf(rendered_query) + df = pd.concat([df, day_result]) + return df + + def render_query(self, start: datetime, end: datetime) -> str: + variables: t.Dict[str, t.Any] = { + "start_ds": start.strftime("%Y-%m-%d"), + "end_ds": end.strftime("%Y-%m-%d"), + } + logger.debug(f"start_ds={variables['start_ds']} end_ds={variables['end_ds']}") + time_aggregation = self._ref.get("time_aggregation") + rolling_window = self._ref.get("window") + rolling_unit = self._ref.get("unit") + if time_aggregation: + variables["time_aggregation"] = time_aggregation + if rolling_window and rolling_unit: + variables["rolling_window"] = rolling_window + variables["rolling_unit"] = rolling_unit + variables.update(self._locals) + additional_macros = create_unregistered_macro_registry( + [ + metrics_end, + metrics_start, + metrics_sample_date, + ] + ) + evaluated_query = run_macro_evaluator( + self._query, + additional_macros=additional_macros, + variables=variables, + engine_adapter=self._context.engine_adapter, + runtime_stage=RuntimeStage.EVALUATING, + ) + rendered_parts = list( + map( + lambda a: a.sql(dialect=self._context.engine_adapter.dialect), + evaluated_query, + ) + ) + return "\n".join(rendered_parts) + + def commit(self, start: datetime, end: datetime, destination: str): + """Like run but commits the result to the database""" + try: + result = self.run(start, end) + except: + logger.error( + "Running query failed", + extra={"query": self._query[0].sql(dialect="duckdb", pretty=True)}, + ) + raise + + create_table = generate_duckdb_create_table(result, destination) + + logger.debug("creating duckdb table") + self._context.engine_adapter.execute(create_table) + + logger.debug("inserting results from the run") + self._context.engine_adapter.insert_append(destination, result) + return result diff --git a/warehouse/metrics_tools/test_intermediate.py b/warehouse/metrics_tools/test_intermediate.py new file mode 100644 index 000000000..0afecaf73 --- /dev/null +++ b/warehouse/metrics_tools/test_intermediate.py @@ -0,0 +1,114 @@ +import typing as t +import pytest + +from sqlmesh.core.macros import MacroEvaluator, MacroRegistry +from sqlmesh.core.dialect import parse_one +from sqlglot import exp +from sqlglot.optimizer.qualify import qualify + +from .models import create_unregistered_macro +from .intermediate import run_intermediate_macro_evaluator, run_macro_evaluator + + +@pytest.fixture +def macro_fixtures(): + def concat_macro(evaluator: MacroEvaluator, *args): + return exp.Concat( + expressions=args, + safe=False, + coalesce=False, + ) + + def get_state(evaluator: MacroEvaluator, key: str | exp.Expression): + if isinstance(key, exp.Literal): + key = key.this + return exp.Literal(this=evaluator.locals["$$test_state"][key], is_string=True) + + registry = create_unregistered_macro(concat_macro) + registry.update(create_unregistered_macro(get_state)) + return registry + + +@pytest.mark.parametrize( + "input,expected,variables", + [ + ( + "select @concat_macro('a', 'b', 'c') from test", + "select CONCAT('a', 'b', 'c') from test", + {}, + ), + ( + "select @unknown_macro('a', 'b', 'c') from test", + "select @unknown_macro('a', 'b', 'c') from test", + {}, + ), + ( + "select @concat_macro(@var1, 'b', 'c') from test", + "select CONCAT('alpha', 'b', 'c') from test", + {"var1": "alpha"}, + ), + ( + "select @unknown_macro(@var1, 'b', 'c') from test", + "select @unknown_macro('alpha', 'b', 'c') from test", + {"var1": "alpha"}, + ), + ( + "select @unknown_macro(@var1, 'b', @concat_macro(@somevar, @var2)) from test", + "select @unknown_macro('alpha', 'b', CONCAT(@somevar, '2')) from test", + {"var1": "alpha", "var2": "2"}, + ), + ( + "select @get_state('foo'), @get_state('baz') from test", + "select 'bar', 'bop' from test", + {"$$test_state": {"foo": "bar", "baz": "bop"}}, + ), + ], +) +def test_intermediate_macro_evaluator( + macro_fixtures: MacroRegistry, + input: str, + expected: str, + variables: t.Dict[str, t.Any], +): + evaluated = run_intermediate_macro_evaluator( + input, macro_fixtures, variables=variables + ) + assert evaluated is not None + assert len(evaluated) == 1 + assert qualify(evaluated[0]) == qualify(parse_one(expected)) + + +@pytest.mark.parametrize( + "input,expected,variables", + [ + ( + "select @concat_macro('a', 'b', 'c') from test", + "select CONCAT('a', 'b', 'c') from test", + {}, + ), + ( + "select @concat_macro('a', 'b', 'c') from test @WHERE(FALSE) 1 > 2", + "select CONCAT('a', 'b', 'c') from test", + {}, + ), + ], +) +def test_macro_evaluator( + macro_fixtures: MacroRegistry, + input: str, + expected: str, + variables: t.Dict[str, t.Any], +): + evaluated = run_macro_evaluator(input, macro_fixtures, variables=variables) + assert evaluated is not None + assert len(evaluated) == 1 + assert qualify(evaluated[0]) == qualify(parse_one(expected)) + + +def test_macro_evaluator_fails(): + failed = False + try: + run_macro_evaluator("select @hi from table") + except Exception: + failed = True + assert failed diff --git a/warehouse/metrics_tools/transformer/__init__.py b/warehouse/metrics_tools/transformer/__init__.py new file mode 100644 index 000000000..a5b1f50ff --- /dev/null +++ b/warehouse/metrics_tools/transformer/__init__.py @@ -0,0 +1,13 @@ +# ruff: noqa: F403 +"""Tools for sql model transformation. + +This allows for a fairly generic process to generate models for sqlmesh that +will be compatible with the environment that sqlmesh stores in it's state by +applying transformations that might need to access some application state/config +_before_ being stored in the sqlmesh state. +""" + +from .base import * +from .intermediate import * +from .transformer import * +from .qualify import * diff --git a/warehouse/metrics_tools/transformer/base.py b/warehouse/metrics_tools/transformer/base.py new file mode 100644 index 000000000..f4b25c0d9 --- /dev/null +++ b/warehouse/metrics_tools/transformer/base.py @@ -0,0 +1,9 @@ +import abc +import typing as t + +from sqlglot import exp + + +class Transform(abc.ABC): + def __call__(self, query: t.List[exp.Expression]) -> t.List[exp.Expression]: + raise NotImplementedError("transformation not implemented") diff --git a/warehouse/metrics_tools/transformer/intermediate.py b/warehouse/metrics_tools/transformer/intermediate.py new file mode 100644 index 000000000..82cf98400 --- /dev/null +++ b/warehouse/metrics_tools/transformer/intermediate.py @@ -0,0 +1,35 @@ +import typing as t + +from metrics_tools.intermediate import run_intermediate_macro_evaluator +from metrics_tools.models import create_unregistered_wrapped_macro +from sqlmesh.core.macros import MacroRegistry +from sqlglot import exp + +from .base import Transform + + +class IntermediateMacroEvaluatorTransform(Transform): + def __init__( + self, + macros: t.List[t.Callable | t.Tuple[t.Callable, t.List[str]]], + variables: t.Dict[str, t.Any], + ): + self._macros = macros + self._variables = variables + + def __call__(self, query: t.List[exp.Expression]) -> t.List[exp.Expression]: + registry = MacroRegistry("intermediate_macros") + for macro in self._macros: + if isinstance(macro, tuple): + registry.update(create_unregistered_wrapped_macro(*macro)) + else: + registry.update(create_unregistered_wrapped_macro(macro)) + + final = [] + for expression in query: + evaluated = run_intermediate_macro_evaluator( + expression, macros=registry, variables=self._variables + ) + assert evaluated is not None + final.extend(evaluated) + return final diff --git a/warehouse/metrics_tools/transformer/qualify.py b/warehouse/metrics_tools/transformer/qualify.py new file mode 100644 index 000000000..000a878b7 --- /dev/null +++ b/warehouse/metrics_tools/transformer/qualify.py @@ -0,0 +1,13 @@ +from typing import List +from sqlglot.optimizer.qualify import qualify +from sqlglot.expressions import Expression + +from .base import Transform + + +class QualifyTransform(Transform): + def __init__(self, **options): + self._options = options + + def __call__(self, query: List[Expression]) -> List[Expression]: + return list(map(lambda q: qualify(q, **self._options), query)) diff --git a/warehouse/metrics_tools/transformer/transformer.py b/warehouse/metrics_tools/transformer/transformer.py new file mode 100644 index 000000000..0d88cf217 --- /dev/null +++ b/warehouse/metrics_tools/transformer/transformer.py @@ -0,0 +1,32 @@ +import typing as t +from dataclasses import dataclass + +from metrics_tools.transformer.base import Transform +from metrics_tools.transformer.qualify import QualifyTransform +from sqlglot import exp +from sqlmesh.core.dialect import parse + + +@dataclass(kw_only=True) +class SQLTransformer: + """The sql transformer. + + This defines a process for sql transformation. Given an ordered list of Transforms + """ + + transforms: t.List[Transform] + disable_qualify: bool = False + + def transform(self, query: str | t.List[exp.Expression]): + if isinstance(query, str): + transformed = parse(query) + else: + transformed = query + # Qualify all + # transformed = list(map(qualify, transformed)) + if not self.disable_qualify: + transformed = QualifyTransform()(transformed) + + for transform in self.transforms: + transformed = transform(transformed) + return transformed diff --git a/warehouse/metrics_tools/utils/__init__.py b/warehouse/metrics_tools/utils/__init__.py new file mode 100644 index 000000000..8713d5571 --- /dev/null +++ b/warehouse/metrics_tools/utils/__init__.py @@ -0,0 +1,4 @@ +# ruff: noqa: F403 + +from .testing import * +from .glot import * diff --git a/warehouse/metrics_tools/utils/dataframes.py b/warehouse/metrics_tools/utils/dataframes.py new file mode 100644 index 000000000..001d6da38 --- /dev/null +++ b/warehouse/metrics_tools/utils/dataframes.py @@ -0,0 +1,6 @@ +import pandas as pd +import typing as t + + +def as_pandas_df(v: t.Any) -> pd.DataFrame: + return t.cast(pd.DataFrame, v) diff --git a/warehouse/metrics_tools/utils/glot.py b/warehouse/metrics_tools/utils/glot.py new file mode 100644 index 000000000..7bb86f3d1 --- /dev/null +++ b/warehouse/metrics_tools/utils/glot.py @@ -0,0 +1,10 @@ +import typing as t + +from sqlglot import exp + + +def exp_literal_to_py_literal(glot_literal: exp.Expression) -> t.Any: + # Don't error by default let it pass + if not isinstance(glot_literal, exp.Literal): + return glot_literal + return glot_literal.this diff --git a/warehouse/metrics_tools/utils/testing.py b/warehouse/metrics_tools/utils/testing.py new file mode 100644 index 000000000..3caea2099 --- /dev/null +++ b/warehouse/metrics_tools/utils/testing.py @@ -0,0 +1,30 @@ +import contextlib +import duckdb +from sqlglot.optimizer.qualify import qualify +import sqlglot as sql +from sqlglot import exp +from sqlmesh.core.dialect import parse_one +from oso_dagster.cbt.utils.compare import is_same_sql + + +def assert_same_sql(actual: exp.Expression | str, expected: exp.Expression | str): + if isinstance(actual, str): + actual = parse_one(actual) + if isinstance(expected, str): + expected = parse_one(expected) + actual = qualify(actual) + expected = qualify(expected) + if not is_same_sql(actual, expected): + assert parse_one(actual.sql()) == parse_one(expected.sql()) + else: + print("SQL DIFF") + diff = sql.diff(actual, expected) + for d in diff: + print(d) + print(len(diff)) + assert is_same_sql(actual, expected) + + +@contextlib.contextmanager +def duckdb_df_context(connection: duckdb.DuckDBPyConnection, query: str): + yield connection.sql(query).df() diff --git a/warehouse/oso_dagster/cbt/utils/compare.py b/warehouse/oso_dagster/cbt/utils/compare.py index 3664d58ac..0cf50fde0 100644 --- a/warehouse/oso_dagster/cbt/utils/compare.py +++ b/warehouse/oso_dagster/cbt/utils/compare.py @@ -2,6 +2,7 @@ import sqlglot as sql from sqlglot.optimizer.qualify import qualify +from sqlglot.optimizer.normalize import normalize from sqlglot.diff import Keep from sqlglot import expressions as exp @@ -27,7 +28,10 @@ def is_same_source_table(a: exp.Table, b: exp.Table): def is_same_sql(a: exp.Expression, b: exp.Expression): - diff = sql.diff(qualify(a), qualify(b)) + diff = sql.diff( + normalize(qualify(sql.parse_one(a.sql()))), + normalize(qualify(sql.parse_one(b.sql()))), + ) for section in diff: if type(section) != Keep: return False diff --git a/warehouse/oso_dagster/cbt/utils/test_utils.py b/warehouse/oso_dagster/cbt/utils/test_utils.py index 149ea6435..0f3ebf3b4 100644 --- a/warehouse/oso_dagster/cbt/utils/test_utils.py +++ b/warehouse/oso_dagster/cbt/utils/test_utils.py @@ -1,5 +1,7 @@ import sqlglot as sql from oso_dagster.cbt.utils import replace_source_tables, is_same_sql +from sqlglot.optimizer.qualify import qualify +from sqlglot.optimizer.normalize import normalize def test_replace_table(): @@ -16,4 +18,4 @@ def test_replace_table(): expected = sql.parse_one( "select * from noreplace as nr inner join replacement as t on t.t_id = nr.nr_id" ) - assert is_same_sql(result3, expected) + assert is_same_sql(normalize(qualify(result3)), normalize(qualify(expected))) diff --git a/warehouse/oso_lets_go/cli.py b/warehouse/oso_lets_go/cli.py index 45552a3a2..e5cd3230c 100644 --- a/warehouse/oso_lets_go/cli.py +++ b/warehouse/oso_lets_go/cli.py @@ -9,7 +9,7 @@ import os import click -from metrics_tools.lib.local.utils import initialize_local_duckdb, reset_local_duckdb +from metrics_tools.local.utils import initialize_local_duckdb, reset_local_duckdb @click.group()