Source code for tardisbase.testing.regression_data.regression_data

import os
import re
from pathlib import Path

import numpy as np
import pandas as pd
import pytest
from _pytest.outcomes import OutcomeException

from tardisbase.testing.regression_data.hdfwriter import HDFWriterMixin



[docs]
class RegressionData:
    def __init__(self, request) -> None:
        self.request = request
        if request.config.getoption("--tardis-regression-data") is None:
            pytest.skip("--tardis-regression-data was not specified")
        regression_data_path = Path(
            request.config.getoption("--tardis-regression-data")
        )
        self.regression_data_path = Path(
            os.path.expandvars(regression_data_path.expanduser())
        )
        self.enable_generate_reference = request.config.getoption(
            "--generate-reference"
        )
        self.fname = f"{self.fname_prefix}.UNKNOWN_FORMAT"
        self.hdf_store_object = None

    @property
    def module_name(self):
        return self.request.node.module.__name__

    @property
    def test_name(self):
        return self.request.node.name

    @property
    def fname_prefix(self):
        double_under = re.compile(r"[:\[\]{}]")
        no_space = re.compile(r'[,"\']')  # quotes and commas

        name = double_under.sub("__", self.test_name)
        name = no_space.sub("", name)
        return name

    @property
    def relative_regression_data_dir(self):
        relative_data_dir = Path(self.module_name.replace(".", "/"))
        if self.request.cls is not None:
            relative_data_dir /= HDFWriterMixin.convert_to_snake_case(
                self.request.cls.__name__
            )
        return relative_data_dir

    @property
    def absolute_regression_data_dir(self):
        return self.regression_data_path / self.relative_regression_data_dir

    @property
    def fpath(self):
        return self.absolute_regression_data_dir / self.fname


[docs]
    def sync_dataframe(self, data, key="data"):
        """
        Synchronizes the dataframe with the regression data.

        Parameters
        ----------
        data : DataFrame
            The dataframe to be synchronized.
        key : str, optional
            The key to use for storing the dataframe in the regression data file. Defaults to "data".

        Returns
        -------
        DataFrame or None
            The synchronized dataframe if `enable_generate_reference` is `False`, otherwise `None`.
        """
        self.fname = f"{self.fname_prefix}.h5"
        if self.enable_generate_reference:
            self.fpath.parent.mkdir(parents=True, exist_ok=True)
            data.to_hdf(
                self.fpath,
                key=key,
            )
            write_status()
        else:
            return pd.read_hdf(self.fpath, key=key)



[docs]
    def sync_ndarray(self, data):
        """
        Synchronizes the ndarray with the regression data.

        Parameters
        ----------
        data : ndarray
            The ndarray to be synchronized.

        Returns
        -------
        ndarray or None
            The synchronized ndarray if `enable_generate_reference` is `False`, otherwise `None`.
        """
        self.fname = f"{self.fname_prefix}.npy"
        if self.enable_generate_reference:
            self.fpath.parent.mkdir(parents=True, exist_ok=True)
            np.save(self.fpath, data)
            write_status()
        else:
            return np.load(self.fpath)



[docs]
    def sync_str(self, data):
        """
        Synchronizes the string with the regression data.

        Parameters
        ----------
        data : str
            The string to be synchronized.

        Returns
        -------
        str or None
            The synchronized string if `enable_generate_reference` is `False`, otherwise `None`.
        """
        self.fname = f"{self.fname_prefix}.txt"
        if self.enable_generate_reference:
            self.fpath.parent.mkdir(parents=True, exist_ok=True)
            with self.fpath.open("w") as fh:
                fh.write(data)
            write_status()
        else:
            with self.fpath.open("r") as fh:
                return fh.read()



[docs]
    def sync_hdf_store(self, tardis_module, update_fname=True):
        """
        Synchronizes the HDF store with the regression data.

        Parameters
        ----------
        tardis_module : object
            The module to be synchronized.
        update_fname : bool, optional
            Whether to update the file name. Defaults to True.

        Returns
        -------
        HDFStore or None
            The synchronized HDF store if `enable_generate_reference` is `False`, otherwise `None`.
        """
        if update_fname:
            self.fname = f"{self.fname_prefix}.h5"
        if self.enable_generate_reference:
            self.fpath.parent.mkdir(parents=True, exist_ok=True)
            with pd.HDFStore(self.fpath, mode="w") as store:
                tardis_module.to_hdf(store, overwrite=True)
            write_status()
        else:
            # since each test function has its own regression data instance
            # each test function will only have one HDFStore object
            self.hdf_store_object = pd.HDFStore(self.fpath, mode="r")
            return self.hdf_store_object





[docs]
@pytest.fixture(scope="function")
def regression_data(request):
    regression_data_instance = RegressionData(request)
    yield regression_data_instance
    if (
        regression_data_instance.hdf_store_object is not None
        and regression_data_instance.hdf_store_object.is_open
    ):
        regression_data_instance.hdf_store_object.close()




[docs]
class PlotDataHDF(HDFWriterMixin):
    """
    A class that writes plot data to HDF5 format using the HDFWriterMixin.
    """

    def __init__(self, **kwargs):
        """
        Initializes PlotDataHDF with arbitrary keyword arguments,
        storing them as attributes and adding their names to hdf_properties.

        Parameters:
        -----------
        **kwargs: Arbitrary keyword arguments representing properties to save.
        """
        self.hdf_properties = []
        for key, value in kwargs.items():
            setattr(self, key, value)
            self.hdf_properties.append(key)




[docs]
class TestWrite(OutcomeException):
    pass




[docs]
def write_status():
    raise TestWrite(msg="Writing regression data for test.")




[docs]
class PytestWritingPlugin:

[docs]
    def pytest_runtest_makereport(self, item, call):
        """
        Custom pytest hook to handle test report generation for regression data writing.

        This hook intercepts test execution and creates a custom test report when
        a TestWrite exception is encountered, marking the test as "written" rather
        than failed.

        Parameters
        ----------
        item : pytest.Item
            The test item being executed.
        call : pytest.CallInfo
            Information about the test call, including any exception information.

        Returns
        -------
        TestReport or None
            Returns a custom TestReport with outcome "written" if a TestWrite
            exception was raised, otherwise returns None to allow default
            report generation.

        Notes
        -----
        This hook is specifically designed for regression testing workflows where
        tests may write reference data instead of comparing against it. When a
        TestWrite exception is raised, it indicates successful data writing rather
        than a test failure.
        """
        if call.excinfo and isinstance(call.excinfo.value, TestWrite):
            from _pytest.reports import TestReport

            rep = TestReport(
                nodeid=item.nodeid,
                location=item.location,
                keywords=item.keywords,
                outcome="written",
                longrepr=None,
                when=call.when,
                sections=[],
            )
            rep.written = True
            return rep



[docs]
    def pytest_report_teststatus(self, report, config):
        """
        Custom pytest hook to report test status for regression data writing.

        This hook is called by pytest to determine the test outcome status and provides
        custom reporting for tests that have written regression data.

        Parameters
        ----------
        report : pytest.TestReport
            The test report object containing information about the test execution,
            including any custom attributes set during the test run.
        config : pytest.Config
            The pytest configuration object containing command-line options and
            configuration settings.

        Returns
        -------
        tuple of (str, str, str) or None
            If the test report has a 'written' attribute that is True, returns a tuple
            containing:
            - outcome: "regression data written" (test outcome description)
            - letter: "W" (single letter representation)
            - word: "WRITTEN" (word representation for verbose output)

            Returns None if the condition is not met, allowing other hooks or default
            behavior to determine the test status.

        Notes
        -----
        This hook is typically used in conjunction with pytest plugins that handle
        regression testing data, allowing tests to be marked as having successfully
        written reference data rather than just passing or failing.
        """
        if hasattr(report, "written") and report.written:
            return ("regression data written", "W", "WRITTEN")
Source code for tardisbase.testing.regression_data.regression_data

tardisbase

Navigation

Related Topics