From 4add25ae3364e4c8fd3b611566aaa898b8af2815 Mon Sep 17 00:00:00 2001
From: Ruben Vorderman <r.h.p.vorderman@lumc.nl>
Date: Fri, 13 Jan 2023 12:07:13 +0100
Subject: [PATCH 01/17] New version

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 015d52ba..d4642b64 100644
--- a/setup.py
+++ b/setup.py
@@ -20,7 +20,7 @@
 
 setup(
     name="pytest-workflow",
-    version="2.0.1",
+    version="2.1.0-dev",
     description="A pytest plugin for configuring workflow/pipeline tests "
                 "using YAML files",
     author="Leiden University Medical Center",

From f81a588f634cd9fe048ea2e79c7934e1cde210d2 Mon Sep 17 00:00:00 2001
From: Will Holtz <wholtz@gmail.com>
Date: Fri, 20 Jan 2023 00:49:35 -0800
Subject: [PATCH 02/17] Document test discovery naming conventions (#172)

* docs: conventions for test discovery

* docs: update HISTORY.rst

* add back end of file newline

* use version indepdent URL
---
 HISTORY.rst            | 4 ++++
 docs/writing_tests.rst | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/HISTORY.rst b/HISTORY.rst
index d05d7d92..3a71c2b1 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -7,6 +7,10 @@ Changelog
 .. This document is user facing. Please word the changes in such a way
 .. that users understand how the changes affect the new version.
 
+version 2.1.0-dev
+---------------------------
+* Document naming conventions for Python test discovery
+
 version 2.0.1
 ---------------------------
 + Fixed a bug where pytest-workflow would crash on logs that used non-ASCII
diff --git a/docs/writing_tests.rst b/docs/writing_tests.rst
index 9d75d435..db09e66f 100644
--- a/docs/writing_tests.rst
+++ b/docs/writing_tests.rst
@@ -160,6 +160,10 @@ Multiple workflows can use the same custom test like this:
 points to the folder where the named workflow was executed. This allows writing
 of advanced python tests for each file produced by the workflow.
 
+Custom tests must follow the `conventions for Python test discovery
+<https://docs.pytest.org/en/latest/explanation/goodpractices.html#conventions-for-python-test-discovery>`_,
+which constrains the names of files and functions containing custom tests.
+
 .. note::
 
     stdout and stderr are available as files in the root of the

From ec8be3c6f83720047fafd7691bad924e225d9aac Mon Sep 17 00:00:00 2001
From: Will Holtz <wholtz@gmail.com>
Date: Wed, 21 Jun 2023 17:07:12 -0700
Subject: [PATCH 03/17] Add ungzip_md5sum check

---
 HISTORY.rst                       |  5 ++++-
 docs/writing_tests.rst            |  1 +
 src/pytest_workflow/file_tests.py | 30 ++++++++++++++++++++++--------
 src/pytest_workflow/schema.py     |  3 +++
 src/pytest_workflow/util.py       | 31 +++++++++++++++++++++++++++----
 tests/test_schema.py              |  1 +
 tests/test_utils.py               | 11 ++++++++++-
 7 files changed, 68 insertions(+), 14 deletions(-)

diff --git a/HISTORY.rst b/HISTORY.rst
index 3a71c2b1..8b2b8ef0 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -9,7 +9,10 @@ Changelog
 
 version 2.1.0-dev
 ---------------------------
-* Document naming conventions for Python test discovery
++ Add md5sum checking on unzipped contents of gzipped output files. Gzipped
+  files contain a timestamp which makes it hard to directly compare the md5sums
+  of gzipped files.
++ Document naming conventions for Python test discovery
 
 version 2.0.1
 ---------------------------
diff --git a/docs/writing_tests.rst b/docs/writing_tests.rst
index db09e66f..0e11a7d7 100644
--- a/docs/writing_tests.rst
+++ b/docs/writing_tests.rst
@@ -64,6 +64,7 @@ Test options
       - path: "TomCruise.txt.gz"       # Gzipped files can also be searched, provided their extension is '.gz'
         contains:
           - "starring"
+        ungzip_md5sum: e27c52f6b5f8152aa3ef58be7bdacc4d   # Md5sum of the ungzipped file (optional)
     stderr:                            # Options for testing stderr (optional)
       contains:                        # A list of strings which should be in stderr (optional)
         - "BSOD error, please contact the IT crowd"
diff --git a/src/pytest_workflow/file_tests.py b/src/pytest_workflow/file_tests.py
index 17642fc8..5503154b 100644
--- a/src/pytest_workflow/file_tests.py
+++ b/src/pytest_workflow/file_tests.py
@@ -22,7 +22,7 @@
 
 from .content_tests import ContentTestCollector
 from .schema import FileTest
-from .util import file_md5sum
+from .util import file_md5sum, gzip_md5sum
 from .workflow import Workflow
 
 
@@ -76,7 +76,16 @@ def collect(self):
                 parent=self,
                 filepath=filepath,
                 md5sum=self.filetest.md5sum,
-                workflow=self.workflow)]
+                workflow=self.workflow,
+                ungzip=False)]
+
+        if self.filetest.ungzip_md5sum:
+            tests += [FileMd5.from_parent(
+                parent=self,
+                filepath=filepath,
+                md5sum=self.filetest.ungzip_md5sum,
+                workflow=self.workflow,
+                ungzip=True)]
 
         return tests
 
@@ -119,20 +128,22 @@ def repr_failure(self, excinfo, style=None):
 
 class FileMd5(pytest.Item):
     def __init__(self, parent: pytest.Collector, filepath: Path,
-                 md5sum: str, workflow: Workflow):
+                 md5sum: str, workflow: Workflow, ungzip: bool):
         """
         Create a tests for the file md5sum.
         :param parent: The collector that started this item
         :param filepath: The path to the file
         :param md5sum:  The expected md5sum
         :param workflow: The workflow running to generate the file
+        :param ungzip: Whether the file should be ungzipped before calculating
         """
-        name = "md5sum"
+        name = "unzip_md5sum" if ungzip else "md5sum"
         super().__init__(name, parent)
         self.filepath = filepath
         self.expected_md5sum = md5sum
         self.observed_md5sum = None
         self.workflow = workflow
+        self.ungzip = ungzip
 
     def runtest(self):
         # Wait for the workflow to finish before we check the md5sum of a file.
@@ -140,11 +151,14 @@ def runtest(self):
         if not self.workflow.matching_exitcode():
             pytest.skip(f"'{self.parent.workflow.name}' did not exit with"
                         f"desired exit code.")
-        self.observed_md5sum = file_md5sum(self.filepath)
+        sum_func = gzip_md5sum if self.ungzip else file_md5sum
+        self.observed_md5sum = sum_func(self.filepath)
         assert self.observed_md5sum == self.expected_md5sum
 
     def repr_failure(self, excinfo, style=None):
+        metric = "ungzip_md5sum" if self.ungzip else "md5sum"
         return (
-            f"Observed md5sum '{self.observed_md5sum}' not equal to expected "
-            f"md5sum '{self.expected_md5sum}' for file '{self.filepath}'"
-        )
+            f"Observed {metric} '{self.observed_md5sum}' not equal to "
+            f"expected {metric} '{self.expected_md5sum}' for file "
+            f"'{self.filepath}'"
+         )
diff --git a/src/pytest_workflow/schema.py b/src/pytest_workflow/schema.py
index c8c3e40f..ed74b6c3 100644
--- a/src/pytest_workflow/schema.py
+++ b/src/pytest_workflow/schema.py
@@ -125,6 +125,7 @@ def __init__(self, contains: Optional[List[str]] = None,
 class FileTest(ContentTest):
     """A class that contains all the properties of a to be tested file."""
     def __init__(self, path: str, md5sum: Optional[str] = None,
+                 ungzip_md5sum: Optional[str] = None,
                  should_exist: bool = DEFAULT_FILE_SHOULD_EXIST,
                  contains: Optional[List[str]] = None,
                  must_not_contain: Optional[List[str]] = None,
@@ -135,6 +136,7 @@ def __init__(self, path: str, md5sum: Optional[str] = None,
         A container object
         :param path: the path to the file
         :param md5sum: md5sum of the file contents
+        :param unzip_md5sum: md5sum of the unzipped file contents
         :param should_exist: whether the file should exist or not
         :param contains: a list of strings that should be present in the file
         :param must_not_contain: a list of strings that should not be present
@@ -150,6 +152,7 @@ def __init__(self, path: str, md5sum: Optional[str] = None,
                          encoding=encoding)
         self.path = Path(path)
         self.md5sum = md5sum
+        self.ungzip_md5sum = ungzip_md5sum
         self.should_exist = should_exist
 
 
diff --git a/src/pytest_workflow/util.py b/src/pytest_workflow/util.py
index a7c91bc0..a91c9b5c 100644
--- a/src/pytest_workflow/util.py
+++ b/src/pytest_workflow/util.py
@@ -1,4 +1,5 @@
 import functools
+import gzip
 import hashlib
 import os
 import re
@@ -7,7 +8,7 @@
 import sys
 import warnings
 from pathlib import Path
-from typing import Callable, Iterator, List, Optional, Set, Tuple, Union
+from typing import BinaryIO, Callable, Iterator, List, Optional, Set, Tuple, Union
 
 Filepath = Union[str, os.PathLike]
 
@@ -204,10 +205,32 @@ def file_md5sum(filepath: Path, block_size=64 * 1024) -> str:
     :param block_size: Block size in bytes
     :return: a md5sum as hexadecimal string.
     """
-    hasher = hashlib.md5()
     with filepath.open('rb') as file_handler:  # Read the file in bytes
-        for block in iter(lambda: file_handler.read(block_size), b''):
-            hasher.update(block)
+        return file_handle_md5sum(file_handler, block_size)
+
+
+def gzip_md5sum(filepath: Path, block_size=64 * 1024) -> str:
+    """
+    Generates a md5sum for the uncompressed contents of gzipped file.
+    Reads file in blocks to save memory.
+    :param filepath: a pathlib. Path to the gzipped file
+    :param block_size: Block size in bytes
+    :return: a md5sum as hexadecimal string.
+    """
+    with gzip.open(filepath) as file_handler:  # Read the file in bytes
+        return file_handle_md5sum(file_handler, block_size)
+
+
+def file_handle_md5sum(file_handler: BinaryIO, block_size) -> str:
+    """
+    Generates a md5sum for a file handle. Reads file in blocks to save memory.
+    :param file_handler: a readable binary file handler
+    :param block_size: Block size in bytes
+    :return: a md5sum as hexadecimal string.
+    """
+    hasher = hashlib.md5()
+    for block in iter(lambda: file_handler.read(block_size), b''):
+        hasher.update(block)
     return hasher.hexdigest()
 
 
diff --git a/tests/test_schema.py b/tests/test_schema.py
index 378be288..98edfc95 100644
--- a/tests/test_schema.py
+++ b/tests/test_schema.py
@@ -171,6 +171,7 @@ def test_filetest_defaults():
     assert file_test.contains_regex == []
     assert file_test.must_not_contain_regex == []
     assert file_test.md5sum is None
+    assert file_test.ungzip_md5sum is None
     assert file_test.should_exist
 
 
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 45f789c3..b2f9f376 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -13,6 +13,7 @@
 #
 # You should have received a copy of the GNU Affero General Public License
 # along with pytest-workflow.  If not, see <https://www.gnu.org/licenses/
+import gzip
 import hashlib
 import itertools
 import os
@@ -26,7 +27,7 @@
 
 from pytest_workflow.util import decode_unaligned, duplicate_tree, \
     file_md5sum, git_check_submodules_cloned, git_root, \
-    is_in_dir, link_tree, replace_whitespace
+    gzip_md5sum, is_in_dir, link_tree, replace_whitespace
 
 WHITESPACE_TESTS = [
     ("bla\nbla", "bla_bla"),
@@ -163,6 +164,14 @@ def test_file_md5sum(hash_file: Path):
     assert whole_file_md5 == per_line_md5
 
 
+def test_gzip_md5sum():
+    hash_file = HASH_FILE_DIR / "LICENSE.gz"
+    with gzip.open(hash_file, "rb") as contents_fh:
+        whole_file_md5 = hashlib.md5(contents_fh.read()).hexdigest()
+    per_line_md5 = gzip_md5sum(hash_file)
+    assert whole_file_md5 == per_line_md5
+
+
 def create_git_repo(path):
     dir = Path(path)
     os.mkdir(dir)

From b83cf3dba7c8f697c34bbaef79d664cc53b1e50d Mon Sep 17 00:00:00 2001
From: Will Holtz <wholtz@gmail.com>
Date: Wed, 21 Jun 2023 17:36:15 -0700
Subject: [PATCH 04/17] add ungzip_md5sum to schema.json

---
 src/pytest_workflow/schema/schema.json | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/pytest_workflow/schema/schema.json b/src/pytest_workflow/schema/schema.json
index 9ead66bb..82ec80c1 100644
--- a/src/pytest_workflow/schema/schema.json
+++ b/src/pytest_workflow/schema/schema.json
@@ -123,6 +123,10 @@
             "should_exist": {
               "type": "boolean"
             },
+            "ungzip_md5sum": {
+              "type": "string",
+              "pattern": "^[a-f0-9]{32}$"
+            },
             "contains": {
               "type": "array",
               "items": {

From 684f314de1b3da2234e53c9de923eb037652169d Mon Sep 17 00:00:00 2001
From: Will Holtz <wholtz@gmail.com>
Date: Wed, 21 Jun 2023 17:54:59 -0700
Subject: [PATCH 05/17] fix unzip_md5sum to ungzip_md5sum

---
 src/pytest_workflow/file_tests.py | 2 +-
 src/pytest_workflow/schema.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pytest_workflow/file_tests.py b/src/pytest_workflow/file_tests.py
index 5503154b..99030c79 100644
--- a/src/pytest_workflow/file_tests.py
+++ b/src/pytest_workflow/file_tests.py
@@ -137,7 +137,7 @@ def __init__(self, parent: pytest.Collector, filepath: Path,
         :param workflow: The workflow running to generate the file
         :param ungzip: Whether the file should be ungzipped before calculating
         """
-        name = "unzip_md5sum" if ungzip else "md5sum"
+        name = "ungzip_md5sum" if ungzip else "md5sum"
         super().__init__(name, parent)
         self.filepath = filepath
         self.expected_md5sum = md5sum
diff --git a/src/pytest_workflow/schema.py b/src/pytest_workflow/schema.py
index ed74b6c3..d4eada2f 100644
--- a/src/pytest_workflow/schema.py
+++ b/src/pytest_workflow/schema.py
@@ -136,7 +136,7 @@ def __init__(self, path: str, md5sum: Optional[str] = None,
         A container object
         :param path: the path to the file
         :param md5sum: md5sum of the file contents
-        :param unzip_md5sum: md5sum of the unzipped file contents
+        :param ungzip_md5sum: md5sum of the unzipped file contents
         :param should_exist: whether the file should exist or not
         :param contains: a list of strings that should be present in the file
         :param must_not_contain: a list of strings that should not be present

From a22950cf58a531d7db6a5cc29d8d032f0634f98f Mon Sep 17 00:00:00 2001
From: Will Holtz <wholtz@gmail.com>
Date: Wed, 21 Jun 2023 18:16:28 -0700
Subject: [PATCH 06/17] fix typing for gzip file handles

---
 src/pytest_workflow/util.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/pytest_workflow/util.py b/src/pytest_workflow/util.py
index a91c9b5c..491033d0 100644
--- a/src/pytest_workflow/util.py
+++ b/src/pytest_workflow/util.py
@@ -8,7 +8,8 @@
 import sys
 import warnings
 from pathlib import Path
-from typing import BinaryIO, Callable, Iterator, List, Optional, Set, Tuple, Union
+from typing import Callable, IO, Iterator, List, Optional, Set, Tuple, Union, \
+                   cast
 
 Filepath = Union[str, os.PathLike]
 
@@ -218,10 +219,10 @@ def gzip_md5sum(filepath: Path, block_size=64 * 1024) -> str:
     :return: a md5sum as hexadecimal string.
     """
     with gzip.open(filepath) as file_handler:  # Read the file in bytes
-        return file_handle_md5sum(file_handler, block_size)
+        return file_handle_md5sum(cast(IO[bytes], file_handler), block_size)
 
 
-def file_handle_md5sum(file_handler: BinaryIO, block_size) -> str:
+def file_handle_md5sum(file_handler: IO[bytes], block_size) -> str:
     """
     Generates a md5sum for a file handle. Reads file in blocks to save memory.
     :param file_handler: a readable binary file handler

From 2f583bf633750556f65fd89242f1e077382d4632 Mon Sep 17 00:00:00 2001
From: Will Holtz <wholtz@gmail.com>
Date: Fri, 23 Jun 2023 12:52:41 -0700
Subject: [PATCH 07/17] move to extract_md5sum using xopen

---
 HISTORY.rst                            |  6 +++---
 docs/writing_tests.rst                 |  8 +++++++-
 requirements.txt                       |  4 +++-
 setup.py                               |  4 +++-
 src/pytest_workflow/file_tests.py      | 22 +++++++++++-----------
 src/pytest_workflow/schema.py          |  6 +++---
 src/pytest_workflow/schema/schema.json |  2 +-
 src/pytest_workflow/util.py            | 11 ++++++-----
 tests/test_schema.py                   |  2 +-
 tests/test_utils.py                    |  8 ++++----
 10 files changed, 42 insertions(+), 31 deletions(-)

diff --git a/HISTORY.rst b/HISTORY.rst
index 8b2b8ef0..455177f1 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -9,9 +9,9 @@ Changelog
 
 version 2.1.0-dev
 ---------------------------
-+ Add md5sum checking on unzipped contents of gzipped output files. Gzipped
-  files contain a timestamp which makes it hard to directly compare the md5sums
-  of gzipped files.
++ Add extract_md5sum check on uncompressed contents of compressed output files.
+  Gzipped files contain a timestamp which makes it hard to directly compare the
+  md5sums of gzipped files.
 + Document naming conventions for Python test discovery
 
 version 2.0.1
diff --git a/docs/writing_tests.rst b/docs/writing_tests.rst
index 0e11a7d7..0a3896d1 100644
--- a/docs/writing_tests.rst
+++ b/docs/writing_tests.rst
@@ -64,7 +64,7 @@ Test options
       - path: "TomCruise.txt.gz"       # Gzipped files can also be searched, provided their extension is '.gz'
         contains:
           - "starring"
-        ungzip_md5sum: e27c52f6b5f8152aa3ef58be7bdacc4d   # Md5sum of the ungzipped file (optional)
+        extract_md5sum: e27c52f6b5f8152aa3ef58be7bdacc4d   # Md5sum of the uncompressed file (optional)
     stderr:                            # Options for testing stderr (optional)
       contains:                        # A list of strings which should be in stderr (optional)
         - "BSOD error, please contact the IT crowd"
@@ -90,6 +90,12 @@ Please see the `Python documentation on regular expressions
 <https://docs.python.org/3/library/re.html>`_ to see how Python handles escape
 sequences.
 
+The ``extract_md5sum`` option is used to uncompress a file and then compare
+the md5sum of the uncompressed file with the supplied md5sum. This option is
+particularly useful when testing gzipped files, which may contain a file
+creation timestamp in the gzip header. The supported compressed file
+formats for this option are gzip, bzip2, xz and Zstandard.
+
 .. note::
     Workflow names must be unique. Pytest workflow will crash when multiple
     workflows have the same name, even if they are in different files.
diff --git a/requirements.txt b/requirements.txt
index 2c5d3bff..884f5ec3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
 pyyaml
 pytest>=7.0.0
-jsonschema
\ No newline at end of file
+jsonschema
+xopen>=1.7.0
+zstandard
diff --git a/setup.py b/setup.py
index d4642b64..e4de914f 100644
--- a/setup.py
+++ b/setup.py
@@ -54,7 +54,9 @@
     install_requires=[
         "pytest>=7.0.0",  # To use pathlib Path's in pytest
         "pyyaml",
-        "jsonschema"
+        "jsonschema",
+        "xopen>=1.4.0",
+        "zstandard",
     ],
     # This line makes sure the plugin is automatically loaded when it is
     # installed in the same environment as pytest. No need to configure
diff --git a/src/pytest_workflow/file_tests.py b/src/pytest_workflow/file_tests.py
index 99030c79..1f98b1c2 100644
--- a/src/pytest_workflow/file_tests.py
+++ b/src/pytest_workflow/file_tests.py
@@ -22,7 +22,7 @@
 
 from .content_tests import ContentTestCollector
 from .schema import FileTest
-from .util import file_md5sum, gzip_md5sum
+from .util import extract_md5sum, file_md5sum
 from .workflow import Workflow
 
 
@@ -77,15 +77,15 @@ def collect(self):
                 filepath=filepath,
                 md5sum=self.filetest.md5sum,
                 workflow=self.workflow,
-                ungzip=False)]
+                extract=False)]
 
-        if self.filetest.ungzip_md5sum:
+        if self.filetest.extract_md5sum:
             tests += [FileMd5.from_parent(
                 parent=self,
                 filepath=filepath,
-                md5sum=self.filetest.ungzip_md5sum,
+                md5sum=self.filetest.extract_md5sum,
                 workflow=self.workflow,
-                ungzip=True)]
+                extract=True)]
 
         return tests
 
@@ -128,22 +128,22 @@ def repr_failure(self, excinfo, style=None):
 
 class FileMd5(pytest.Item):
     def __init__(self, parent: pytest.Collector, filepath: Path,
-                 md5sum: str, workflow: Workflow, ungzip: bool):
+                 md5sum: str, workflow: Workflow, extract: bool):
         """
         Create a tests for the file md5sum.
         :param parent: The collector that started this item
         :param filepath: The path to the file
         :param md5sum:  The expected md5sum
         :param workflow: The workflow running to generate the file
-        :param ungzip: Whether the file should be ungzipped before calculating
+        :param extract: Whether the file should be extracted before calculating
         """
-        name = "ungzip_md5sum" if ungzip else "md5sum"
+        name = "extract_md5sum" if extract else "md5sum"
         super().__init__(name, parent)
         self.filepath = filepath
         self.expected_md5sum = md5sum
         self.observed_md5sum = None
         self.workflow = workflow
-        self.ungzip = ungzip
+        self.extract = extract
 
     def runtest(self):
         # Wait for the workflow to finish before we check the md5sum of a file.
@@ -151,12 +151,12 @@ def runtest(self):
         if not self.workflow.matching_exitcode():
             pytest.skip(f"'{self.parent.workflow.name}' did not exit with"
                         f"desired exit code.")
-        sum_func = gzip_md5sum if self.ungzip else file_md5sum
+        sum_func = extract_md5sum if self.extract else file_md5sum
         self.observed_md5sum = sum_func(self.filepath)
         assert self.observed_md5sum == self.expected_md5sum
 
     def repr_failure(self, excinfo, style=None):
-        metric = "ungzip_md5sum" if self.ungzip else "md5sum"
+        metric = "extract_md5sum" if self.extract else "md5sum"
         return (
             f"Observed {metric} '{self.observed_md5sum}' not equal to "
             f"expected {metric} '{self.expected_md5sum}' for file "
diff --git a/src/pytest_workflow/schema.py b/src/pytest_workflow/schema.py
index d4eada2f..499dd416 100644
--- a/src/pytest_workflow/schema.py
+++ b/src/pytest_workflow/schema.py
@@ -125,7 +125,7 @@ def __init__(self, contains: Optional[List[str]] = None,
 class FileTest(ContentTest):
     """A class that contains all the properties of a to be tested file."""
     def __init__(self, path: str, md5sum: Optional[str] = None,
-                 ungzip_md5sum: Optional[str] = None,
+                 extract_md5sum: Optional[str] = None,
                  should_exist: bool = DEFAULT_FILE_SHOULD_EXIST,
                  contains: Optional[List[str]] = None,
                  must_not_contain: Optional[List[str]] = None,
@@ -136,7 +136,7 @@ def __init__(self, path: str, md5sum: Optional[str] = None,
         A container object
         :param path: the path to the file
         :param md5sum: md5sum of the file contents
-        :param ungzip_md5sum: md5sum of the unzipped file contents
+        :param extract_md5sum: md5sum of the extracted file contents
         :param should_exist: whether the file should exist or not
         :param contains: a list of strings that should be present in the file
         :param must_not_contain: a list of strings that should not be present
@@ -152,7 +152,7 @@ def __init__(self, path: str, md5sum: Optional[str] = None,
                          encoding=encoding)
         self.path = Path(path)
         self.md5sum = md5sum
-        self.ungzip_md5sum = ungzip_md5sum
+        self.extract_md5sum = extract_md5sum
         self.should_exist = should_exist
 
 
diff --git a/src/pytest_workflow/schema/schema.json b/src/pytest_workflow/schema/schema.json
index 82ec80c1..718b6c25 100644
--- a/src/pytest_workflow/schema/schema.json
+++ b/src/pytest_workflow/schema/schema.json
@@ -123,7 +123,7 @@
             "should_exist": {
               "type": "boolean"
             },
-            "ungzip_md5sum": {
+            "extract_md5sum": {
               "type": "string",
               "pattern": "^[a-f0-9]{32}$"
             },
diff --git a/src/pytest_workflow/util.py b/src/pytest_workflow/util.py
index 491033d0..beed52af 100644
--- a/src/pytest_workflow/util.py
+++ b/src/pytest_workflow/util.py
@@ -1,5 +1,4 @@
 import functools
-import gzip
 import hashlib
 import os
 import re
@@ -11,6 +10,8 @@
 from typing import Callable, IO, Iterator, List, Optional, Set, Tuple, Union, \
                    cast
 
+from xopen import xopen
+
 Filepath = Union[str, os.PathLike]
 
 
@@ -210,15 +211,15 @@ def file_md5sum(filepath: Path, block_size=64 * 1024) -> str:
         return file_handle_md5sum(file_handler, block_size)
 
 
-def gzip_md5sum(filepath: Path, block_size=64 * 1024) -> str:
+def extract_md5sum(filepath: Path, block_size=64 * 1024) -> str:
     """
-    Generates a md5sum for the uncompressed contents of gzipped file.
+    Generates a md5sum for the uncompressed contents of compressed file.
     Reads file in blocks to save memory.
-    :param filepath: a pathlib. Path to the gzipped file
+    :param filepath: a pathlib. Path to the compressed file
     :param block_size: Block size in bytes
     :return: a md5sum as hexadecimal string.
     """
-    with gzip.open(filepath) as file_handler:  # Read the file in bytes
+    with xopen(filepath, 'rb') as file_handler:  # Read the file in bytes
         return file_handle_md5sum(cast(IO[bytes], file_handler), block_size)
 
 
diff --git a/tests/test_schema.py b/tests/test_schema.py
index 98edfc95..8defda21 100644
--- a/tests/test_schema.py
+++ b/tests/test_schema.py
@@ -171,7 +171,7 @@ def test_filetest_defaults():
     assert file_test.contains_regex == []
     assert file_test.must_not_contain_regex == []
     assert file_test.md5sum is None
-    assert file_test.ungzip_md5sum is None
+    assert file_test.extract_md5sum is None
     assert file_test.should_exist
 
 
diff --git a/tests/test_utils.py b/tests/test_utils.py
index b2f9f376..574225bd 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -26,8 +26,8 @@
 import pytest
 
 from pytest_workflow.util import decode_unaligned, duplicate_tree, \
-    file_md5sum, git_check_submodules_cloned, git_root, \
-    gzip_md5sum, is_in_dir, link_tree, replace_whitespace
+    extract_md5sum, file_md5sum, git_check_submodules_cloned, git_root, \
+    is_in_dir, link_tree, replace_whitespace
 
 WHITESPACE_TESTS = [
     ("bla\nbla", "bla_bla"),
@@ -164,11 +164,11 @@ def test_file_md5sum(hash_file: Path):
     assert whole_file_md5 == per_line_md5
 
 
-def test_gzip_md5sum():
+def test_extract_md5sum():
     hash_file = HASH_FILE_DIR / "LICENSE.gz"
     with gzip.open(hash_file, "rb") as contents_fh:
         whole_file_md5 = hashlib.md5(contents_fh.read()).hexdigest()
-    per_line_md5 = gzip_md5sum(hash_file)
+    per_line_md5 = extract_md5sum(hash_file)
     assert whole_file_md5 == per_line_md5
 
 

From dd69ee7153313631b09ebf178eedddf537820cfe Mon Sep 17 00:00:00 2001
From: Ruben Vorderman <r.h.p.vorderman@lumc.nl>
Date: Mon, 26 Jun 2023 08:51:27 +0200
Subject: [PATCH 08/17] Also include extract_md5sum keyword on the readme

---
 README.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.rst b/README.rst
index c53eb866..3e19015c 100644
--- a/README.rst
+++ b/README.rst
@@ -127,6 +127,7 @@ predefined tests as well as custom tests are possible.
       - path: "TomCruise.txt.gz"       # Gzipped files can also be searched, provided their extension is '.gz'
         contains:
           - "starring"
+        extract_md5sum: e27c52f6b5f8152aa3ef58be7bdacc4d   # Md5sum of the uncompressed file (optional)
     stderr:                            # Options for testing stderr (optional)
       contains:                        # A list of strings which should be in stderr (optional)
         - "BSOD error, please contact the IT crowd"

From a76d1a5bb2aa41d1f3ed10578a148c17bb44d530 Mon Sep 17 00:00:00 2001
From: Jorge Alvarez Jarreta <jalvarez@ebi.ac.uk>
Date: Mon, 4 Mar 2024 14:23:28 +0000
Subject: [PATCH 09/17] make pytest_collect_file() compatible with pytest 8.1+

---
 HISTORY.rst                   | 6 ++++++
 src/pytest_workflow/plugin.py | 4 ++--
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/HISTORY.rst b/HISTORY.rst
index d05d7d92..97ef39f7 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -2,6 +2,12 @@
 Changelog
 ==========
 
+version 2.0.2
+---------------------------
++ Fixed a bug where pytest 8.1+ would raise a ``PluginValidationError`` because
+  the hook ``pytest_collect_file()`` has finally dropped the deprecated
+  argument ``path`` from its specification.
+
 .. Newest changes should be on top.
 
 .. This document is user facing. Please word the changes in such a way
diff --git a/src/pytest_workflow/plugin.py b/src/pytest_workflow/plugin.py
index 1e012d38..fe90fe7f 100644
--- a/src/pytest_workflow/plugin.py
+++ b/src/pytest_workflow/plugin.py
@@ -117,11 +117,11 @@ def addoption(self, *args, **kwargs):
     return parser
 
 
-def pytest_collect_file(file_path, path, parent):
+def pytest_collect_file(file_path, parent):
     """Collection hook
     This collects the yaml files that start with "test" and end with
     .yaml or .yml"""
-    if path.ext in [".yml", ".yaml"] and path.basename.startswith("test"):
+    if file_path.suffix in [".yml", ".yaml"] and file_path.name.startswith("test"):
         return YamlFile.from_parent(parent, path=file_path)
     return None
 

From b5fa0fac30a79e890e8a481d6365306961d3ab4e Mon Sep 17 00:00:00 2001
From: Ruben Vorderman <r.h.p.vorderman@lumc.nl>
Date: Mon, 4 Mar 2024 15:50:17 +0100
Subject: [PATCH 10/17] Add 3.12 python support, drop 3.7 support

---
 .github/workflows/ci.yml |  6 +++---
 HISTORY.rst              | 10 +++++-----
 setup.py                 |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 0b90645c..49b9f3f4 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -14,7 +14,7 @@ jobs:
     strategy:
       matrix:
         python-version:
-          - "3.7"
+          - "3.8"
     steps:
       - uses: actions/checkout@v2.3.4
       - name: Set up Python ${{ matrix.python-version }}
@@ -42,11 +42,11 @@ jobs:
     strategy:
       matrix:
         python-version:
-          - "3.7"
           - "3.8"
           - "3.9"
           - "3.10"
           - "3.11"
+          - "3.12"
     steps:
       - uses: actions/checkout@v2.3.4
       - name: Set up Python ${{ matrix.python-version }}
@@ -64,7 +64,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.7"]
+        python-version: ["3.8"]
         test-program: [snakemake, miniwdl]
     steps:
       - uses: actions/checkout@v2.3.4
diff --git a/HISTORY.rst b/HISTORY.rst
index 3671b423..e0a8ee69 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -2,11 +2,6 @@
 Changelog
 ==========
 
-version 2.0.2
----------------------------
-+ Fixed a bug where pytest 8.1+ would raise a ``PluginValidationError`` because
-  the hook ``pytest_collect_file()`` has finally dropped the deprecated
-  argument ``path`` from its specification.
 
 .. Newest changes should be on top.
 
@@ -15,6 +10,11 @@ version 2.0.2
 
 version 2.1.0-dev
 ---------------------------
++ Python version 3.7 support is dropped because it is deprecated. Python
+  version 3.12 was added.
++ Fixed a bug where pytest 8.1+ would raise a ``PluginValidationError`` because
+  the hook ``pytest_collect_file()`` has finally dropped the deprecated
+  argument ``path`` from its specification.
 + Add extract_md5sum check on uncompressed contents of compressed output files.
   Gzipped files contain a timestamp which makes it hard to directly compare the
   md5sums of gzipped files.
diff --git a/setup.py b/setup.py
index e4de914f..f36faf0d 100644
--- a/setup.py
+++ b/setup.py
@@ -39,11 +39,11 @@
     classifiers=[
         "Programming Language :: Python :: 3 :: Only",
         "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
         "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3.12",
         "Development Status :: 5 - Production/Stable",
         "License :: OSI Approved :: "
         "GNU Affero General Public License v3 or later (AGPLv3+)",

From 87a6eeba09ffa70da77cc5aa238c4178a376c726 Mon Sep 17 00:00:00 2001
From: Ruben Vorderman <r.h.p.vorderman@lumc.nl>
Date: Mon, 4 Mar 2024 15:53:56 +0100
Subject: [PATCH 11/17] Fix line length error

---
 src/pytest_workflow/plugin.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/pytest_workflow/plugin.py b/src/pytest_workflow/plugin.py
index fe90fe7f..142c715b 100644
--- a/src/pytest_workflow/plugin.py
+++ b/src/pytest_workflow/plugin.py
@@ -121,7 +121,8 @@ def pytest_collect_file(file_path, parent):
     """Collection hook
     This collects the yaml files that start with "test" and end with
     .yaml or .yml"""
-    if file_path.suffix in [".yml", ".yaml"] and file_path.name.startswith("test"):
+    if (file_path.suffix in [".yml", ".yaml"] and
+            file_path.name.startswith("test")):
         return YamlFile.from_parent(parent, path=file_path)
     return None
 

From ae51d3b759bcd71418b54e071513f11a116b14a1 Mon Sep 17 00:00:00 2001
From: Ruben Vorderman <r.h.p.vorderman@lumc.nl>
Date: Mon, 4 Mar 2024 16:07:56 +0100
Subject: [PATCH 12/17] Fix the unstable api of snakemake to work again

Removing a command line flag, and also switching error output again to stderr.
---
 tests/functional/simple_snakefile_test_cases.yml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/functional/simple_snakefile_test_cases.yml b/tests/functional/simple_snakefile_test_cases.yml
index 0ca2f666..c110672e 100644
--- a/tests/functional/simple_snakefile_test_cases.yml
+++ b/tests/functional/simple_snakefile_test_cases.yml
@@ -1,26 +1,26 @@
 - name: test-dry-run
-  command: snakemake -n -r -p -s SimpleSnakefile --config N_LINES_TO_READ=1
+  command: snakemake -n -p -s SimpleSnakefile --config N_LINES_TO_READ=1
 - name: test-config-missing
-  command: snakemake -n -r -p -s SimpleSnakefile
+  command: snakemake -n -p -s SimpleSnakefile
   exit_code: 1
-  stdout:
+  stderr:
     contains:
       - "You must set --config N_LINES_TO_READ=<a value>."
 - name: test-config-wrong-type
-  command: snakemake -n -r -p -s SimpleSnakefile --config N_LINES_TO_READ=one
+  command: snakemake -n -p -s SimpleSnakefile --config N_LINES_TO_READ=one
   exit_code: 1
-  stdout:
+  stderr:
     contains:
       - "N_LINES_TO_READ must be an integer."
 - name: test-config-invalid-value
-  command: snakemake -n -r -p -s SimpleSnakefile --config N_LINES_TO_READ=-1
+  command: snakemake -n -p -s SimpleSnakefile --config N_LINES_TO_READ=-1
   exit_code: 1
-  stdout:
+  stderr:
     contains:
       - "N_LINES_TO_READ must at least be 1."
 - name: test-snakemake-run
   command: >-
-    snakemake --cores 1 -r -p -s SimpleSnakefile --config N_LINES_TO_READ=500
+    snakemake --cores 1 -p -s SimpleSnakefile --config N_LINES_TO_READ=500
   files:
     - path: rand/0.txt
     - path: rand/1.txt

From 71f621b377278624de16e0ce077b2a2956ede160 Mon Sep 17 00:00:00 2001
From: Ruben Vorderman <r.h.p.vorderman@lumc.nl>
Date: Mon, 4 Mar 2024 16:16:31 +0100
Subject: [PATCH 13/17] Require a higher python version in python_requires

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index f36faf0d..6d46308e 100644
--- a/setup.py
+++ b/setup.py
@@ -49,8 +49,8 @@
         "GNU Affero General Public License v3 or later (AGPLv3+)",
         "Framework :: Pytest",
     ],
-    # Because we cannot test anymore on Python 3.6.
-    python_requires=">=3.7",
+    # Because we cannot test anymore on Python 3.8.
+    python_requires=">=3.8",
     install_requires=[
         "pytest>=7.0.0",  # To use pathlib Path's in pytest
         "pyyaml",

From 40857b2a96f3330a5378cd4acf22d276d0a4f902 Mon Sep 17 00:00:00 2001
From: Ruben Vorderman <r.h.p.vorderman@lumc.nl>
Date: Mon, 4 Mar 2024 16:16:48 +0100
Subject: [PATCH 14/17] Test snakemake on required 3.11

---
 .github/workflows/ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 49b9f3f4..d0998f27 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -64,7 +64,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ["3.8"]
+        python-version: ["3.11"]
         test-program: [snakemake, miniwdl]
     steps:
       - uses: actions/checkout@v2.3.4

From 9b551af0f745d5ce8fe969a061cbfc68ed22225f Mon Sep 17 00:00:00 2001
From: A U Thor <author@example.com>
Date: Tue, 5 Mar 2024 17:04:18 +0100
Subject: [PATCH 15/17] Add readthedocs configuration

---
 .readthedocs.yml | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 .readthedocs.yml

diff --git a/.readthedocs.yml b/.readthedocs.yml
new file mode 100644
index 00000000..69f1d937
--- /dev/null
+++ b/.readthedocs.yml
@@ -0,0 +1,13 @@
+version: 2
+
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.11"
+
+sphinx:
+  configuration: docs/conf.py
+
+python:
+  install:
+    - requirements: requirements-docs.txt

From cd8465fc6b9dcf4a00d87e85e6e7d175b03ef7f6 Mon Sep 17 00:00:00 2001
From: Ruben Vorderman <r.h.p.vorderman@lumc.nl>
Date: Mon, 18 Mar 2024 15:08:42 +0100
Subject: [PATCH 16/17] Set stable version

---
 HISTORY.rst | 2 +-
 setup.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/HISTORY.rst b/HISTORY.rst
index e0a8ee69..e1f5c312 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -8,7 +8,7 @@ Changelog
 .. This document is user facing. Please word the changes in such a way
 .. that users understand how the changes affect the new version.
 
-version 2.1.0-dev
+version 2.1.0
 ---------------------------
 + Python version 3.7 support is dropped because it is deprecated. Python
   version 3.12 was added.
diff --git a/setup.py b/setup.py
index 6d46308e..fcf4306e 100644
--- a/setup.py
+++ b/setup.py
@@ -20,7 +20,7 @@
 
 setup(
     name="pytest-workflow",
-    version="2.1.0-dev",
+    version="2.1.0",
     description="A pytest plugin for configuring workflow/pipeline tests "
                 "using YAML files",
     author="Leiden University Medical Center",

From 77251bb89c1f2f325da8898c96952d5c1652bdda Mon Sep 17 00:00:00 2001
From: Ruben Vorderman <r.h.p.vorderman@lumc.nl>
Date: Mon, 18 Mar 2024 15:23:40 +0100
Subject: [PATCH 17/17] Fix snakemake functional tests

---
 tests/functional/simple_snakefile_test_cases.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/functional/simple_snakefile_test_cases.yml b/tests/functional/simple_snakefile_test_cases.yml
index c110672e..9ec26e21 100644
--- a/tests/functional/simple_snakefile_test_cases.yml
+++ b/tests/functional/simple_snakefile_test_cases.yml
@@ -3,19 +3,19 @@
 - name: test-config-missing
   command: snakemake -n -p -s SimpleSnakefile
   exit_code: 1
-  stderr:
+  stdout:  # Dry run output should be stdout. See https://github.com/snakemake/snakemake/issues/2757
     contains:
       - "You must set --config N_LINES_TO_READ=<a value>."
 - name: test-config-wrong-type
   command: snakemake -n -p -s SimpleSnakefile --config N_LINES_TO_READ=one
   exit_code: 1
-  stderr:
+  stdout:
     contains:
       - "N_LINES_TO_READ must be an integer."
 - name: test-config-invalid-value
   command: snakemake -n -p -s SimpleSnakefile --config N_LINES_TO_READ=-1
   exit_code: 1
-  stderr:
+  stdout:
     contains:
       - "N_LINES_TO_READ must at least be 1."
 - name: test-snakemake-run