diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0b90645c..d0998f27 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -14,7 +14,7 @@ jobs: strategy: matrix: python-version: - - "3.7" + - "3.8" steps: - uses: actions/checkout@v2.3.4 - name: Set up Python ${{ matrix.python-version }} @@ -42,11 +42,11 @@ jobs: strategy: matrix: python-version: - - "3.7" - "3.8" - "3.9" - "3.10" - "3.11" + - "3.12" steps: - uses: actions/checkout@v2.3.4 - name: Set up Python ${{ matrix.python-version }} @@ -64,7 +64,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.7"] + python-version: ["3.11"] test-program: [snakemake, miniwdl] steps: - uses: actions/checkout@v2.3.4 diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 00000000..69f1d937 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,13 @@ +version: 2 + +build: + os: ubuntu-22.04 + tools: + python: "3.11" + +sphinx: + configuration: docs/conf.py + +python: + install: + - requirements: requirements-docs.txt diff --git a/HISTORY.rst b/HISTORY.rst index d05d7d92..e1f5c312 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -2,11 +2,24 @@ Changelog ========== + .. Newest changes should be on top. .. This document is user facing. Please word the changes in such a way .. that users understand how the changes affect the new version. +version 2.1.0 +--------------------------- ++ Python version 3.7 support is dropped because it is deprecated. Python + version 3.12 was added. ++ Fixed a bug where pytest 8.1+ would raise a ``PluginValidationError`` because + the hook ``pytest_collect_file()`` has finally dropped the deprecated + argument ``path`` from its specification. ++ Add extract_md5sum check on uncompressed contents of compressed output files. + Gzipped files contain a timestamp which makes it hard to directly compare the + md5sums of gzipped files. ++ Document naming conventions for Python test discovery + version 2.0.1 --------------------------- + Fixed a bug where pytest-workflow would crash on logs that used non-ASCII diff --git a/README.rst b/README.rst index c53eb866..3e19015c 100644 --- a/README.rst +++ b/README.rst @@ -127,6 +127,7 @@ predefined tests as well as custom tests are possible. - path: "TomCruise.txt.gz" # Gzipped files can also be searched, provided their extension is '.gz' contains: - "starring" + extract_md5sum: e27c52f6b5f8152aa3ef58be7bdacc4d # Md5sum of the uncompressed file (optional) stderr: # Options for testing stderr (optional) contains: # A list of strings which should be in stderr (optional) - "BSOD error, please contact the IT crowd" diff --git a/docs/writing_tests.rst b/docs/writing_tests.rst index 9d75d435..0a3896d1 100644 --- a/docs/writing_tests.rst +++ b/docs/writing_tests.rst @@ -64,6 +64,7 @@ Test options - path: "TomCruise.txt.gz" # Gzipped files can also be searched, provided their extension is '.gz' contains: - "starring" + extract_md5sum: e27c52f6b5f8152aa3ef58be7bdacc4d # Md5sum of the uncompressed file (optional) stderr: # Options for testing stderr (optional) contains: # A list of strings which should be in stderr (optional) - "BSOD error, please contact the IT crowd" @@ -89,6 +90,12 @@ Please see the `Python documentation on regular expressions `_ to see how Python handles escape sequences. +The ``extract_md5sum`` option is used to uncompress a file and then compare +the md5sum of the uncompressed file with the supplied md5sum. This option is +particularly useful when testing gzipped files, which may contain a file +creation timestamp in the gzip header. The supported compressed file +formats for this option are gzip, bzip2, xz and Zstandard. + .. note:: Workflow names must be unique. Pytest workflow will crash when multiple workflows have the same name, even if they are in different files. @@ -160,6 +167,10 @@ Multiple workflows can use the same custom test like this: points to the folder where the named workflow was executed. This allows writing of advanced python tests for each file produced by the workflow. +Custom tests must follow the `conventions for Python test discovery +`_, +which constrains the names of files and functions containing custom tests. + .. note:: stdout and stderr are available as files in the root of the diff --git a/requirements.txt b/requirements.txt index 2c5d3bff..884f5ec3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ pyyaml pytest>=7.0.0 -jsonschema \ No newline at end of file +jsonschema +xopen>=1.7.0 +zstandard diff --git a/setup.py b/setup.py index 015d52ba..fcf4306e 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ setup( name="pytest-workflow", - version="2.0.1", + version="2.1.0", description="A pytest plugin for configuring workflow/pipeline tests " "using YAML files", author="Leiden University Medical Center", @@ -39,22 +39,24 @@ classifiers=[ "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: " "GNU Affero General Public License v3 or later (AGPLv3+)", "Framework :: Pytest", ], - # Because we cannot test anymore on Python 3.6. - python_requires=">=3.7", + # Because we cannot test anymore on Python 3.8. + python_requires=">=3.8", install_requires=[ "pytest>=7.0.0", # To use pathlib Path's in pytest "pyyaml", - "jsonschema" + "jsonschema", + "xopen>=1.4.0", + "zstandard", ], # This line makes sure the plugin is automatically loaded when it is # installed in the same environment as pytest. No need to configure diff --git a/src/pytest_workflow/file_tests.py b/src/pytest_workflow/file_tests.py index 17642fc8..1f98b1c2 100644 --- a/src/pytest_workflow/file_tests.py +++ b/src/pytest_workflow/file_tests.py @@ -22,7 +22,7 @@ from .content_tests import ContentTestCollector from .schema import FileTest -from .util import file_md5sum +from .util import extract_md5sum, file_md5sum from .workflow import Workflow @@ -76,7 +76,16 @@ def collect(self): parent=self, filepath=filepath, md5sum=self.filetest.md5sum, - workflow=self.workflow)] + workflow=self.workflow, + extract=False)] + + if self.filetest.extract_md5sum: + tests += [FileMd5.from_parent( + parent=self, + filepath=filepath, + md5sum=self.filetest.extract_md5sum, + workflow=self.workflow, + extract=True)] return tests @@ -119,20 +128,22 @@ def repr_failure(self, excinfo, style=None): class FileMd5(pytest.Item): def __init__(self, parent: pytest.Collector, filepath: Path, - md5sum: str, workflow: Workflow): + md5sum: str, workflow: Workflow, extract: bool): """ Create a tests for the file md5sum. :param parent: The collector that started this item :param filepath: The path to the file :param md5sum: The expected md5sum :param workflow: The workflow running to generate the file + :param extract: Whether the file should be extracted before calculating """ - name = "md5sum" + name = "extract_md5sum" if extract else "md5sum" super().__init__(name, parent) self.filepath = filepath self.expected_md5sum = md5sum self.observed_md5sum = None self.workflow = workflow + self.extract = extract def runtest(self): # Wait for the workflow to finish before we check the md5sum of a file. @@ -140,11 +151,14 @@ def runtest(self): if not self.workflow.matching_exitcode(): pytest.skip(f"'{self.parent.workflow.name}' did not exit with" f"desired exit code.") - self.observed_md5sum = file_md5sum(self.filepath) + sum_func = extract_md5sum if self.extract else file_md5sum + self.observed_md5sum = sum_func(self.filepath) assert self.observed_md5sum == self.expected_md5sum def repr_failure(self, excinfo, style=None): + metric = "extract_md5sum" if self.extract else "md5sum" return ( - f"Observed md5sum '{self.observed_md5sum}' not equal to expected " - f"md5sum '{self.expected_md5sum}' for file '{self.filepath}'" - ) + f"Observed {metric} '{self.observed_md5sum}' not equal to " + f"expected {metric} '{self.expected_md5sum}' for file " + f"'{self.filepath}'" + ) diff --git a/src/pytest_workflow/plugin.py b/src/pytest_workflow/plugin.py index 1e012d38..142c715b 100644 --- a/src/pytest_workflow/plugin.py +++ b/src/pytest_workflow/plugin.py @@ -117,11 +117,12 @@ def addoption(self, *args, **kwargs): return parser -def pytest_collect_file(file_path, path, parent): +def pytest_collect_file(file_path, parent): """Collection hook This collects the yaml files that start with "test" and end with .yaml or .yml""" - if path.ext in [".yml", ".yaml"] and path.basename.startswith("test"): + if (file_path.suffix in [".yml", ".yaml"] and + file_path.name.startswith("test")): return YamlFile.from_parent(parent, path=file_path) return None diff --git a/src/pytest_workflow/schema.py b/src/pytest_workflow/schema.py index c8c3e40f..499dd416 100644 --- a/src/pytest_workflow/schema.py +++ b/src/pytest_workflow/schema.py @@ -125,6 +125,7 @@ def __init__(self, contains: Optional[List[str]] = None, class FileTest(ContentTest): """A class that contains all the properties of a to be tested file.""" def __init__(self, path: str, md5sum: Optional[str] = None, + extract_md5sum: Optional[str] = None, should_exist: bool = DEFAULT_FILE_SHOULD_EXIST, contains: Optional[List[str]] = None, must_not_contain: Optional[List[str]] = None, @@ -135,6 +136,7 @@ def __init__(self, path: str, md5sum: Optional[str] = None, A container object :param path: the path to the file :param md5sum: md5sum of the file contents + :param extract_md5sum: md5sum of the extracted file contents :param should_exist: whether the file should exist or not :param contains: a list of strings that should be present in the file :param must_not_contain: a list of strings that should not be present @@ -150,6 +152,7 @@ def __init__(self, path: str, md5sum: Optional[str] = None, encoding=encoding) self.path = Path(path) self.md5sum = md5sum + self.extract_md5sum = extract_md5sum self.should_exist = should_exist diff --git a/src/pytest_workflow/schema/schema.json b/src/pytest_workflow/schema/schema.json index 9ead66bb..718b6c25 100644 --- a/src/pytest_workflow/schema/schema.json +++ b/src/pytest_workflow/schema/schema.json @@ -123,6 +123,10 @@ "should_exist": { "type": "boolean" }, + "extract_md5sum": { + "type": "string", + "pattern": "^[a-f0-9]{32}$" + }, "contains": { "type": "array", "items": { diff --git a/src/pytest_workflow/util.py b/src/pytest_workflow/util.py index a7c91bc0..beed52af 100644 --- a/src/pytest_workflow/util.py +++ b/src/pytest_workflow/util.py @@ -7,7 +7,10 @@ import sys import warnings from pathlib import Path -from typing import Callable, Iterator, List, Optional, Set, Tuple, Union +from typing import Callable, IO, Iterator, List, Optional, Set, Tuple, Union, \ + cast + +from xopen import xopen Filepath = Union[str, os.PathLike] @@ -204,10 +207,32 @@ def file_md5sum(filepath: Path, block_size=64 * 1024) -> str: :param block_size: Block size in bytes :return: a md5sum as hexadecimal string. """ - hasher = hashlib.md5() with filepath.open('rb') as file_handler: # Read the file in bytes - for block in iter(lambda: file_handler.read(block_size), b''): - hasher.update(block) + return file_handle_md5sum(file_handler, block_size) + + +def extract_md5sum(filepath: Path, block_size=64 * 1024) -> str: + """ + Generates a md5sum for the uncompressed contents of compressed file. + Reads file in blocks to save memory. + :param filepath: a pathlib. Path to the compressed file + :param block_size: Block size in bytes + :return: a md5sum as hexadecimal string. + """ + with xopen(filepath, 'rb') as file_handler: # Read the file in bytes + return file_handle_md5sum(cast(IO[bytes], file_handler), block_size) + + +def file_handle_md5sum(file_handler: IO[bytes], block_size) -> str: + """ + Generates a md5sum for a file handle. Reads file in blocks to save memory. + :param file_handler: a readable binary file handler + :param block_size: Block size in bytes + :return: a md5sum as hexadecimal string. + """ + hasher = hashlib.md5() + for block in iter(lambda: file_handler.read(block_size), b''): + hasher.update(block) return hasher.hexdigest() diff --git a/tests/functional/simple_snakefile_test_cases.yml b/tests/functional/simple_snakefile_test_cases.yml index 0ca2f666..9ec26e21 100644 --- a/tests/functional/simple_snakefile_test_cases.yml +++ b/tests/functional/simple_snakefile_test_cases.yml @@ -1,26 +1,26 @@ - name: test-dry-run - command: snakemake -n -r -p -s SimpleSnakefile --config N_LINES_TO_READ=1 + command: snakemake -n -p -s SimpleSnakefile --config N_LINES_TO_READ=1 - name: test-config-missing - command: snakemake -n -r -p -s SimpleSnakefile + command: snakemake -n -p -s SimpleSnakefile exit_code: 1 - stdout: + stdout: # Dry run output should be stdout. See https://github.com/snakemake/snakemake/issues/2757 contains: - "You must set --config N_LINES_TO_READ=." - name: test-config-wrong-type - command: snakemake -n -r -p -s SimpleSnakefile --config N_LINES_TO_READ=one + command: snakemake -n -p -s SimpleSnakefile --config N_LINES_TO_READ=one exit_code: 1 stdout: contains: - "N_LINES_TO_READ must be an integer." - name: test-config-invalid-value - command: snakemake -n -r -p -s SimpleSnakefile --config N_LINES_TO_READ=-1 + command: snakemake -n -p -s SimpleSnakefile --config N_LINES_TO_READ=-1 exit_code: 1 stdout: contains: - "N_LINES_TO_READ must at least be 1." - name: test-snakemake-run command: >- - snakemake --cores 1 -r -p -s SimpleSnakefile --config N_LINES_TO_READ=500 + snakemake --cores 1 -p -s SimpleSnakefile --config N_LINES_TO_READ=500 files: - path: rand/0.txt - path: rand/1.txt diff --git a/tests/test_schema.py b/tests/test_schema.py index 378be288..8defda21 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -171,6 +171,7 @@ def test_filetest_defaults(): assert file_test.contains_regex == [] assert file_test.must_not_contain_regex == [] assert file_test.md5sum is None + assert file_test.extract_md5sum is None assert file_test.should_exist diff --git a/tests/test_utils.py b/tests/test_utils.py index 45f789c3..574225bd 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -13,6 +13,7 @@ # # You should have received a copy of the GNU Affero General Public License # along with pytest-workflow. If not, see