From 100ea21c17929a58198a0ba862efee2f5d834b12 Mon Sep 17 00:00:00 2001 From: Donald Nguyen Date: Thu, 23 May 2024 21:57:02 -0400 Subject: [PATCH] Use re2 instead of re for match expressions CEL specifies that match should use re2 pattern matching and not re semantics. --- README.rst | 15 ++++++++ poetry.lock | 82 ++++++++++++++++++++++++++++++++++++++-- pyproject.toml | 5 +++ src/celpy/evaluation.py | 31 +++++++++++++-- tests/test_evaluation.py | 20 ++++++++++ 5 files changed, 147 insertions(+), 6 deletions(-) diff --git a/README.rst b/README.rst index 64625c5..6a443a6 100644 --- a/README.rst +++ b/README.rst @@ -37,6 +37,21 @@ Installation You now have the CEL run-time available to Python-based applications. + +re2 +--- + +CEL specifies that regular expressions use re2 syntax, +https://github.com/google/re2/wiki/Syntax. To keep its dependencies minimal and +this implementation easily embeddable, cel-python uses the Python standard +library ``re`` syntax by default. If a ``re2`` package is installed or the +``re2`` extra is provided, cel-python will use ``re2`` syntax instead. + +:: + + pip install cel-python[re2] + + Command Line ============ diff --git a/poetry.lock b/poetry.lock index c358a04..10bc119 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "alabaster" @@ -321,6 +321,80 @@ docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1 testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)"] typing = ["typing-extensions (>=4.8)"] +[[package]] +name = "google-re2" +version = "1.1.20240501" +description = "RE2 Python bindings" +optional = false +python-versions = "~=3.8" +files = [ + {file = "google_re2-1.1.20240501-1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:fd5bd3e21fcedd0489a9cc749857544494eb232f78b5f59041951aac901cf0d2"}, + {file = "google_re2-1.1.20240501-1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:89579c162bc6278663c6fce05058bba85559f7ae6a2fea0b35469c6e185c77e3"}, + {file = "google_re2-1.1.20240501-1-cp310-cp310-macosx_13_0_arm64.whl", hash = "sha256:e716decd68b8bc52fe6de77d56c898f7885e415ec41f5a2ce47d831deb09c614"}, + {file = "google_re2-1.1.20240501-1-cp310-cp310-macosx_13_0_x86_64.whl", hash = "sha256:76cda7ad02b7378dd18d2becbfdc993468be38a7bbd3de6e985f6fe8b711e7db"}, + {file = "google_re2-1.1.20240501-1-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:13442fbe09b21f30b87cd8024285238416318851a7b4f83bda277076c96bb041"}, + {file = "google_re2-1.1.20240501-1-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:f30c8557e4f3e12b955730b3ad65f327a06ca228c2f696fee0140a07cd126cd8"}, + {file = "google_re2-1.1.20240501-1-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:75d08a0a5eac9a16abfb92afdc55370a6868ffd2db5853a7c04cf683a3482023"}, + {file = "google_re2-1.1.20240501-1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9a21d18f749cfe7037c356a7556c727249e5a7be0af633411895edb5b7fcf80a"}, + {file = "google_re2-1.1.20240501-1-cp310-cp310-win32.whl", hash = "sha256:0740c7608db8944ed16ca9eadccb6322c82d59581019e4b3dc7ca09c4f9d260d"}, + {file = "google_re2-1.1.20240501-1-cp310-cp310-win_amd64.whl", hash = "sha256:4af422cf956cfc106a87fbd560573aa77a787a9286d7b3119c65b07c8ae51d20"}, + {file = "google_re2-1.1.20240501-1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:347c31ff0100a740477ef89c9ed3d5d1f8afaefe66d3b4cb584be74f2db142a2"}, + {file = "google_re2-1.1.20240501-1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:7f218b3c30ca04616466ab64e74e87aa6885a81daeafed487d56e32e48aa56c8"}, + {file = "google_re2-1.1.20240501-1-cp311-cp311-macosx_13_0_arm64.whl", hash = "sha256:274e3555d0d2f086906db3971e5b975353802fe8410a7d0e7cf7fd58627501da"}, + {file = "google_re2-1.1.20240501-1-cp311-cp311-macosx_13_0_x86_64.whl", hash = "sha256:90ccb99c40d434e6c6376dbf40cb6415f0e6cb3f3d8f44ff851c7955559dfe2f"}, + {file = "google_re2-1.1.20240501-1-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:65366d7e56da1599cc5e424bb1c12a75e40ec2e5e4a14ced158403fd8c8bace9"}, + {file = "google_re2-1.1.20240501-1-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:000aba6721c53485736bc8d9c4584bf62207aed1669658deea527f7a6cc919b6"}, + {file = "google_re2-1.1.20240501-1-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1be71c0bb8296e146c6e676a7ee771e0c7d2a6d60822c7eaf62ac3a1a8ab48a2"}, + {file = "google_re2-1.1.20240501-1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a7f11e962e7bac5245c0d7ee5780f8fb0f0f0b98c3bfb0ec47f3b0adf3e584d7"}, + {file = "google_re2-1.1.20240501-1-cp311-cp311-win32.whl", hash = "sha256:af730d04792cb72f34adc60d42d592f10342f76e88f34f528c27f3547e82d7ec"}, + {file = "google_re2-1.1.20240501-1-cp311-cp311-win_amd64.whl", hash = "sha256:25fad9f0c10c5ea73ca44de2b42dd6eec280e92ab0cb6ec9ba8582789b494964"}, + {file = "google_re2-1.1.20240501-1-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:7ab8af0abbffa5b1ac06f243fbb1fc8d6fb4b371863c923784bc1a71cb19ba13"}, + {file = "google_re2-1.1.20240501-1-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:b44697ee28aa887eb7620db2a12d1f4812dc1366eeaaf83804f856d9aac4c404"}, + {file = "google_re2-1.1.20240501-1-cp312-cp312-macosx_13_0_arm64.whl", hash = "sha256:b222b4f1b529a3a54111d22805b1ed51da9cc1bbc9d3bd93f89a87706f13070c"}, + {file = "google_re2-1.1.20240501-1-cp312-cp312-macosx_13_0_x86_64.whl", hash = "sha256:fab2be9fc8b1fa66be445971cc848a679a5ea64e11a2eabd484cb67adbc4a9fa"}, + {file = "google_re2-1.1.20240501-1-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:3015b89b24bca120197e246f102ae363a53059704cabac6444f4be7fe99fcced"}, + {file = "google_re2-1.1.20240501-1-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:055e79d79318813cea181b3e4111b4bf3113282d5591f6fc0e65a2a864c7ac1c"}, + {file = "google_re2-1.1.20240501-1-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6373de530c8474d7760b8a597432222037c5891530f3df70e92d2f53b2193a5a"}, + {file = "google_re2-1.1.20240501-1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:296c9c47d506109d40fb5ff8a68f9a382cd67b43eadfb88af19fdff6815208f6"}, + {file = "google_re2-1.1.20240501-1-cp312-cp312-win32.whl", hash = "sha256:86c648f134af49df588c99960e6f95cf7309afff1f596d677786308db64fe297"}, + {file = "google_re2-1.1.20240501-1-cp312-cp312-win_amd64.whl", hash = "sha256:797ae8a630aa0a4de895e1055f2ee4dfdb2a9ee2762cc4c0c316955ba270a324"}, + {file = "google_re2-1.1.20240501-1-cp38-cp38-macosx_12_0_arm64.whl", hash = "sha256:e26ecdd5bc1b522b6d47e59e990d22562211f4a7da63c72ed17e75e903fd200b"}, + {file = "google_re2-1.1.20240501-1-cp38-cp38-macosx_12_0_x86_64.whl", hash = "sha256:1d676f94560ec09fcd838492afffb577f289c8032af02347a65f635bb3195763"}, + {file = "google_re2-1.1.20240501-1-cp38-cp38-macosx_13_0_arm64.whl", hash = "sha256:a1b4dd5f1f41a88db584d89c9a1e275180908321049eb4beda00b53fa6038cc6"}, + {file = "google_re2-1.1.20240501-1-cp38-cp38-macosx_13_0_x86_64.whl", hash = "sha256:7f7fc9df2e9fee161edccd8ef6fb5a6aa81b782fe2a5d13ed203a3e3f04eeb42"}, + {file = "google_re2-1.1.20240501-1-cp38-cp38-macosx_14_0_arm64.whl", hash = "sha256:01d1adb2e1c1cea3c481acff439ffc3a1aedc8c7094c691df23150846bf87e7a"}, + {file = "google_re2-1.1.20240501-1-cp38-cp38-macosx_14_0_x86_64.whl", hash = "sha256:5f0e17a4e6a3e88a2f57ad9f34150e16efaad08bfdee6e5b2768204a9ec88ba3"}, + {file = "google_re2-1.1.20240501-1-cp38-cp38-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9f972a8f70cc655a0cd4a1c88aba1852a4717a35a5a3028412b073b85697e829"}, + {file = "google_re2-1.1.20240501-1-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:68a5cf3ccffef6e8f728972fe1f0a37cfff9c149a2d36960b46940ac8626c4d8"}, + {file = "google_re2-1.1.20240501-1-cp38-cp38-win32.whl", hash = "sha256:f427320bd1e56e9aab24a6c4a100a83824a1afa111a22ffdaf8f7340b0efe4ec"}, + {file = "google_re2-1.1.20240501-1-cp38-cp38-win_amd64.whl", hash = "sha256:4593e5a4e0da713cea1d6fb9b561ab46aee32c3da16743f3cc3be7c409ef7efe"}, + {file = "google_re2-1.1.20240501-1-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:a0959955cb3b6e7b4d27f65b9fa2b5900a2f0c7b08cfc1855f0991559ac885f4"}, + {file = "google_re2-1.1.20240501-1-cp39-cp39-macosx_12_0_x86_64.whl", hash = "sha256:6b2b3f43bd1c031103aa552a6e76befebd255f12a74cda622b9a7353c9530f6e"}, + {file = "google_re2-1.1.20240501-1-cp39-cp39-macosx_13_0_arm64.whl", hash = "sha256:fd010dcd3416d47a8f3c86cec2f9ee0840fd92ac92de3d00af432202377eb6c5"}, + {file = "google_re2-1.1.20240501-1-cp39-cp39-macosx_13_0_x86_64.whl", hash = "sha256:55ee5e382b6c11298fad1e4d929ec3fd06c5bea1324166810b2752dda2db51b2"}, + {file = "google_re2-1.1.20240501-1-cp39-cp39-macosx_14_0_arm64.whl", hash = "sha256:c9f5ebd22eced203f7099fa9e84897da759008f170d618dca488595fe7720c87"}, + {file = "google_re2-1.1.20240501-1-cp39-cp39-macosx_14_0_x86_64.whl", hash = "sha256:c5a9e44aebabb501fd26de8a3db3517730b3952ddf8030d620b8c13adb4cb910"}, + {file = "google_re2-1.1.20240501-1-cp39-cp39-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4070385f8a273fc2db1d62e14a612c5d0160b655ffae39d3a790f3d058e2c48c"}, + {file = "google_re2-1.1.20240501-1-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d09049ddef786feeec8d246c45722bc97e5ef5e3a47dd3fd842d12bded2c86c5"}, + {file = "google_re2-1.1.20240501-1-cp39-cp39-win32.whl", hash = "sha256:4f50028b81fe4e2546837adc2c2b28bc7161a6df34ed5ce810d6e33012138858"}, + {file = "google_re2-1.1.20240501-1-cp39-cp39-win_amd64.whl", hash = "sha256:6f48a6a4f88b60444ec7d045e99cffcff40508aca61635f76009916a5b409304"}, + {file = "google_re2-1.1.20240501.tar.gz", hash = "sha256:1a291ae834ec4230c71671b0162999ec76c0b88fbd33ab4d362a07fa9a7d37b4"}, +] + +[[package]] +name = "google-re2-stubs" +version = "0.1.0" +description = "Typestubs for google-re2" +optional = false +python-versions = "<4.0.0,>=3.8.0" +files = [ + {file = "google_re2_stubs-0.1.0-py3-none-any.whl", hash = "sha256:fa5099ea36a4f1d73f50b39bf09485a9f038d7b6f3f72acc9826fd3b5fa1292f"}, + {file = "google_re2_stubs-0.1.0.tar.gz", hash = "sha256:aedcc254aae9c05bbb2a08aada2ee16e17e88492d3317723b589f1c111988e8e"}, +] + +[package.dependencies] +google-re2 = ">=1.0" + [[package]] name = "identify" version = "2.5.36" @@ -800,7 +874,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -1167,7 +1240,10 @@ files = [ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] testing = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"] +[extras] +re2 = ["google-re2"] + [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "de443df6e65881342b7a829e72769555297369bce7a46b9bd7499360b844b0cd" +content-hash = "6271564a493c3e2d6a7b5e21452f7ab0b4d6796588279f1efe8bed425f379c83" diff --git a/pyproject.toml b/pyproject.toml index 50542d1..b65fa24 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,10 @@ python-dateutil = "^2.9.0.post0" pyyaml = "^6.0.1" types-pyyaml = "^6.0.12.20240311" types-python-dateutil = "^2.9.0.20240316" +google-re2 = { version = "^1.0", optional = true } + +[tool.poetry.extras] +re2 = ["google-re2"] [tool.poetry.group.dev.dependencies] behave = "^1.2.6" @@ -55,6 +59,7 @@ pytest-cov = "^5.0.0" sphinx = "^6.0" tox = "^4.15.0" pre-commit = "^3.5" +google-re2-stubs = "^0.1.0" [build-system] requires = ["poetry-core"] diff --git a/src/celpy/evaluation.py b/src/celpy/evaluation.py index 47caf9b..f70469c 100644 --- a/src/celpy/evaluation.py +++ b/src/celpy/evaluation.py @@ -51,6 +51,12 @@ import celpy.celtypes from celpy.celparser import tree_dump +_USE_RE2 = True +try: + import re2 +except ImportError: + _USE_RE2 = False + # A CEL type annotation. Used in an environment to describe objects as well as functions. # This is a list of types, plus Callable for conversion functions. Annotation = Union[ @@ -59,10 +65,8 @@ Type[celpy.celtypes.FunctionType], # Concrete class for annotations ] - logger = logging.getLogger("evaluation") - class CELSyntaxError(Exception): """CEL Syntax error -- the AST did not have the expected structure.""" def __init__(self, arg: Any, line: Optional[int] = None, column: Optional[int] = None) -> None: @@ -293,6 +297,27 @@ def operator_in(item: Result, container: Result) -> Result: return result +def _function_matches_re(text: str, pattern: str) -> Result: + try: + m = re.search(pattern, text) + except re.error as ex: + return CELEvalError("match error", ex.__class__, ex.args) + + return celpy.celtypes.BoolType(m is not None) + + +def _function_matches_re2(text: str, pattern: str) -> Result: + try: + m = re2.search(pattern, text) + except re2.error as ex: + return CELEvalError("match error", ex.__class__, ex.args) + + return celpy.celtypes.BoolType(m is not None) + + +function_matches = _function_matches_re2 if _USE_RE2 else _function_matches_re + + def function_size(container: Result) -> Result: """ The size() function applied to a Value. Delegate to Python's :py:func:`len`. @@ -340,7 +365,7 @@ def function_size(container: Result) -> Result: # StringType methods "endsWith": lambda s, text: celpy.celtypes.BoolType(s.endswith(text)), "startsWith": lambda s, text: celpy.celtypes.BoolType(s.startswith(text)), - "matches": lambda s, pattern: celpy.celtypes.BoolType(re.search(pattern, s) is not None), + "matches": function_matches, "contains": lambda s, text: celpy.celtypes.BoolType(text in s), # TimestampType methods. Type details are redundant, but required because of the lambdas "getDate": lambda ts, tz_name=None: celpy.celtypes.IntType(ts.getDate(tz_name)), diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py index 0d361fc..68c9f3d 100644 --- a/tests/test_evaluation.py +++ b/tests/test_evaluation.py @@ -37,6 +37,7 @@ import celpy.evaluation # For monkeypatching from celpy import celparser, celtypes from celpy.evaluation import * +from celpy.evaluation import _function_matches_re, _function_matches_re2 def test_exception_syntax_error(): @@ -158,6 +159,25 @@ def test_operator_in(): assert isinstance(operator_in(celtypes.IntType(-1), container_2), CELEvalError) +def test_function_matches_re2(): + empty_string = celtypes.StringType("") + # re2-specific patterns which behave differently than standard re + assert _function_matches_re2(empty_string, "^\\z") + assert isinstance(_function_matches_re2(empty_string, "^\\Z"), CELEvalError) + + +def test_function_matches_re(): + empty_string = celtypes.StringType("") + # re2-specific patterns which behave differently than standard re + assert isinstance(_function_matches_re(empty_string, "^\\z"), CELEvalError) + assert _function_matches_re(empty_string, "^\\Z") + + +def test_function_matches(): + empty_string = celtypes.StringType("") + assert function_matches(empty_string, "^$") + + def test_function_size(): container_1 = celtypes.ListType([ celtypes.IntType(42),