From b03bb7a3176a6d4d5f7c8d181a0f94c9356c3560 Mon Sep 17 00:00:00 2001 From: Lucio Anderlini Date: Tue, 9 Sep 2025 19:48:05 +0200 Subject: [PATCH 01/15] Added polynominal features and cleaned-up boiler-plate for testing with fixtures. --- scikinC/PolynomialFeaturesConverter.py | 45 ++++++++++++++ scikinC/__init__.py | 3 +- scikinC/_tools.py | 3 +- test/test_PolynomialFeatures.py | 56 +++++++++++++++++ test/test_QuantileTransformer.py | 52 +++------------- test/testing_boilerplate.py | 84 ++++++++++++++++++++++++++ test/wrap.C | 2 +- test/wrap.py | 4 +- 8 files changed, 202 insertions(+), 47 deletions(-) create mode 100644 scikinC/PolynomialFeaturesConverter.py create mode 100644 test/test_PolynomialFeatures.py create mode 100644 test/testing_boilerplate.py diff --git a/scikinC/PolynomialFeaturesConverter.py b/scikinC/PolynomialFeaturesConverter.py new file mode 100644 index 0000000..59077ca --- /dev/null +++ b/scikinC/PolynomialFeaturesConverter.py @@ -0,0 +1,45 @@ +from sklearn.preprocessing import PolynomialFeatures +from textwrap import indent +import sys + +from scikinC import BaseConverter +from ._tools import array2c + +class PolynomialFeaturesConverter (BaseConverter): + + def convert (self, model: PolynomialFeatures, name = None): + lines = self.header() + + nOutputFeatures = model.n_output_features_ + nInputFeatures = model.n_features_in_ + order = model.order + include_bias = model.include_bias + + powers = [''] + for output_feature, features_in_powers in enumerate(model.powers_): + if (all([p == 0 for p in features_in_powers])): + powers.append("ret[%d] = 1;" % output_feature) + else: + powers.append( + ("ret[%d] = " % output_feature) + + ('*'.join(["input[%d]" % i_var for i_var, pow in enumerate(features_in_powers) for _ in range(pow)])) + + ";" + ) + + + print(powers, file=sys.stderr) + + lines.append ( """ + extern "C" + FLOAT_T* %(name)s (FLOAT_T* ret, const FLOAT_T *input) + { + %(powers)s; + return ret; + } + """ % dict ( + name=name, + powers=indent('\n'.join(powers), ' '*7) + ) + ) + + return '\n'.join(lines) diff --git a/scikinC/__init__.py b/scikinC/__init__.py index 2e3e83a..1027e3a 100644 --- a/scikinC/__init__.py +++ b/scikinC/__init__.py @@ -20,8 +20,9 @@ 'FastQuantileLayer': 'FastQuantileLayerConverter', 'FunctionTransformer': 'FunctionTransformerConverter', 'ColumnTransformer': 'ColumnTransformerConverter', + 'PolynomialFeatures': 'PolynomialFeaturesConverter', - ## Keras + ## Keras 'Sequential': 'KerasSequentialConverter', } diff --git a/scikinC/_tools.py b/scikinC/_tools.py index 3e11bd6..901a09a 100644 --- a/scikinC/_tools.py +++ b/scikinC/_tools.py @@ -24,7 +24,8 @@ def array2c (array, fmt = None): ################################################################################ def get_n_features (algo): if hasattr(algo, 'n_features'): return algo.n_features - elif hasattr(algo, 'n_features_'): return algo.n_features_ + elif hasattr(algo, 'n_features_'): return algo.n_features_ + elif hasattr(algo, 'n_output_features_'): return algo.n_output_features_ elif algo.__class__.__name__ == 'Sequential': return algo.layers[-1].kernel.shape[-1] elif algo.__class__.__name__ == 'DecorrTransformer': diff --git a/test/test_PolynomialFeatures.py b/test/test_PolynomialFeatures.py new file mode 100644 index 0000000..e9f3900 --- /dev/null +++ b/test/test_PolynomialFeatures.py @@ -0,0 +1,56 @@ +import numpy as np +from sklearn.preprocessing import PolynomialFeatures + +# PyTest testing infrastructure +import pytest + +# Local testing infrastructure +from wrap import deploy_pickle +from testing_boilerplate import fixtures + + +################################################################################ +## Test preparation +@fixtures.register() +def basic_pf(): + pf_ = PolynomialFeatures(degree=2) + X = np.arange(10)[None, :] + pf_.fit(X) + return pf_ + +@fixtures.register() +def large_pf(): + pf_ = PolynomialFeatures(degree=10) + X = np.arange(2)[None, :] + pf_.fit(X) + return pf_ + +@fixtures.register() +def no_bias_pf(): + pf_ = PolynomialFeatures(include_bias=False, degree=2) + X = np.arange(10)[None, :] + pf_.fit(X) + return pf_ + +@fixtures.register() +def fortran_order_pf(): + pf_ = PolynomialFeatures(order='F', degree=2) + X = np.arange(10)[None, :] + pf_.fit(X) + return pf_ + + +################################################################################ +## Real tests +@fixtures.test() +def test_forward(pf): + xtest = np.arange(pf.n_features_in_) + py = pf.transform(xtest[None]) + deployed = deploy_pickle("polynomialfeatures", pf) + c = deployed.transform(n_outputs=pf.n_output_features_, args=xtest) + + print(np.c_[py.ravel(), c.ravel()]) + + assert np.abs(py - c).max() < 1e-5, 'Result inconsistent with expectation' + + diff --git a/test/test_QuantileTransformer.py b/test/test_QuantileTransformer.py index 23564c6..77f9ea2 100644 --- a/test/test_QuantileTransformer.py +++ b/test/test_QuantileTransformer.py @@ -9,38 +9,39 @@ # Local testing infrastructure from wrap import deploy_pickle +from testing_boilerplate import fixtures ################################################################################ ## Test preparation -@pytest.fixture +@fixtures.register def scaler_uniform(): scaler_ = QuantileTransformer() X = np.random.uniform (20,30,(1000, 20)) scaler_.fit (X) return scaler_ -@pytest.fixture +@fixtures.register def scaler_normal(): scaler_ = QuantileTransformer(output_distribution='normal', n_quantiles=100) X = np.random.uniform (20,30,(1000, 20)) scaler_.fit (X) return scaler_ -@pytest.fixture +@fixtures.register def scaler_bool_uniform(): scaler_ = QuantileTransformer(output_distribution='uniform') X = np.random.choice ([22.,27.], (1000, 20), (0.8, 0.2)) scaler_.fit (X) return scaler_ -@pytest.fixture +@fixtures.register def scaler_bool_normal(): scaler_ = QuantileTransformer(output_distribution='normal') X = np.random.choice ([22.,27.], (1000, 20), (0.8, 0.2)) scaler_.fit (X) return scaler_ -@pytest.fixture +@fixtures.register def scaler_delta_normal(): scaler_ = QuantileTransformer(output_distribution='normal') X = np.full((10000,20), np.pi) @@ -52,44 +53,12 @@ def read_file(filename): with open(os.path.join(dir, "pathologies", filename), 'rb') as f: return pickle.load(f) -@pytest.fixture -def pathology_1(): - return read_file('column_with_quantile_1.pkl') - -@pytest.fixture -def pathology_2(): - return read_file('column_with_quantile_2.pkl') - -@pytest.fixture -def pathology_3(): - return read_file('column_with_quantile_3.pkl') - -@pytest.fixture -def pathology_4(): - return read_file('column_with_quantile_4.pkl') - - - - - -scalers = [ - 'scaler_uniform', - 'scaler_normal', - 'scaler_bool_uniform', - 'scaler_bool_normal', - 'scaler_delta_normal', - 'pathology_1', - 'pathology_2', - 'pathology_3', - 'pathology_4', - ] ################################################################################ ## Real tests -@pytest.mark.parametrize ('scaler', scalers) -def test_forward (scaler, request): - scaler = request.getfixturevalue(scaler) +@fixtures.test +def test_forward (scaler): n_features = scaler.n_features_in_ if hasattr(scaler, 'n_features_in_') else 20 deployed = deploy_pickle("quantiletransformer", scaler) @@ -105,10 +74,9 @@ def test_forward (scaler, request): array = np.array(results) print (array.T) -@pytest.mark.parametrize ('scaler', scalers) -def test_inverse (scaler, request): +@fixtures.test +def test_inverse (scaler): if hasattr(scaler, 'transform_inverse'): - scaler = request.getfixturevalue(scaler) deployed = deploy_pickle("quantiletransformer", scaler) xtest = np.random.uniform (0,1, 20) py = scaler.inverse_transform (xtest[None]) diff --git a/test/testing_boilerplate.py b/test/testing_boilerplate.py new file mode 100644 index 0000000..c7e0261 --- /dev/null +++ b/test/testing_boilerplate.py @@ -0,0 +1,84 @@ +""" +A minimalistic fixture registry for pytest that enables: +- Centralized tracking of fixtures per test module +- Automatic parametrization of tests over registered fixtures +- Cleaner test definitions with granular reporting + + +""" +import sys +import pytest + +class FixtureRegistry: + """ + A registry to track pytest fixtures per module and generate + parametrized tests automatically. + """ + + def __init__(self): + self._fixtures_per_module = dict() + + def register(self, test_category=None): + """ + Decorator to register a pytest fixture and store its name + under the calling module. + + Args: + f (function): The fixture function + + Returns: + function: The same function wrapped as a pytest fixture + """ + if test_category is None: + test_category = sys._getframe(1).f_globals["__name__"] + + def decorator(f): + fixture_name = f.__name__ + + # Initialize fixture set for the module if needed + self._fixtures_per_module.setdefault(test_category, set()) + + # Register the fixture name + self._fixtures_per_module[test_category].add(fixture_name) + + # Return the input function as a fixture + return pytest.fixture(f) + + return decorator + + + def test(self, test_category=None): + """ + Decorator to generate a parametrized test over all registered + fixtures in the calling module. Each fixture will be passed + to the test function individually. + + Args: + test_category: a string defining the set of tests to run. Defaults to callers' module name + + Returns: + function: A parametrized test function + """ + if test_category is None: + test_category = sys._getframe(1).f_globals["__name__"] + + def decorator(f): + fixture_names = self._fixtures_per_module.get(test_category, []) + + @pytest.mark.parametrize ('fixture_name', fixture_names) + def test_forward(fixture_name, request): + fixture_value = request.getfixturevalue(fixture_name) + return f(fixture_value) + + return test_forward + + return decorator + +# Singleton instance +fixtures = FixtureRegistry() + + + + + + diff --git a/test/wrap.C b/test/wrap.C index cc2e5e5..af72b59 100644 --- a/test/wrap.C +++ b/test/wrap.C @@ -24,7 +24,7 @@ int main (int argc, char *argv[]) const char* libname = argv[1]; const char* funcname = argv[2]; const int nY = atoi (argv[3]); - const size_t in0 = 4; + const size_t in0 = 4; // This is the number of fixed arguments, used later to compute nX const size_t nX = argc-in0; FLOAT_T iBuf[1064], oBuf[1064]; diff --git a/test/wrap.py b/test/wrap.py index b925544..743d472 100644 --- a/test/wrap.py +++ b/test/wrap.py @@ -45,11 +45,11 @@ def get_funcnames(self): return ret - def call_function(self, nArgs, args, funcname): + def call_function(self, n_outputs, args, funcname): path = os.path.dirname(os.path.realpath(__file__)) output = subprocess.check_output( - [path+"/wrap.exe", self.compiled, funcname, str(nArgs)] + + [path+"/wrap.exe", self.compiled, funcname, str(n_outputs)] + [str(x) for x in args] ) From 82c125b0fc29f87be5e8844422d88718eb81edfd Mon Sep 17 00:00:00 2001 From: Lucio Anderlini Date: Wed, 10 Sep 2025 11:24:46 +0200 Subject: [PATCH 02/15] Refreshed test infrastructure and got tests running with python 3.12 --- README.md | 16 +- scikinC/GBDTTraversalConverter.py | 55 ++--- scikinC/GBDTUnrollingConverter.py | 199 +++++++++--------- scikinC/ModelLoader.py | 4 + scikinC/_tools.py | 18 +- scikinC/layers/LeakyReLU.py | 2 +- ...ing_boilerplate.py => fixture_registry.py} | 20 +- test/test_ColumnTransformerConverter.py | 54 ++--- test/test_FastQuantileLayer.py | 60 ------ test/test_FunctionTransformer.py | 63 ++++++ test/test_FunctionTransformerConverter.py | 75 ------- test/test_GBDTC.py | 163 ++++++++------ test/test_Pipeline.py | 85 +++++--- test/test_PolynomialFeatures.py | 2 +- test/test_QuantileTransformer.py | 22 +- test/test_pathologies.py | 56 +++++ test/wrap.py | 2 +- 17 files changed, 469 insertions(+), 427 deletions(-) rename test/{testing_boilerplate.py => fixture_registry.py} (70%) delete mode 100644 test/test_FastQuantileLayer.py create mode 100644 test/test_FunctionTransformer.py delete mode 100644 test/test_FunctionTransformerConverter.py create mode 100644 test/test_pathologies.py diff --git a/README.md b/README.md index 4129c25..29cdb76 100644 --- a/README.md +++ b/README.md @@ -182,14 +182,14 @@ A few notes: ## Implemented converters #### Scikit-Learn preprocessing - | Model | Implementation | Test | Notes | - | ---------------------- | --------------- | --------- | ----------------------------- | - | `MinMaxScaler` | Available | Available | | - | `StandardScaler` | Available | Available | | - | `QuantileTransformer` | Available | Available | | - | `FunctionTransformer` | Available | Available | Only functions in math.h | - | `ColumnTransformer` | Available | Available | Only integer column indices | - | `Pipeline` | Available | Partial | Pipelines of pipelines break | + | Model | Implementation | Test | Notes | + | ---------------------- | --------------- | --------- |-----------------------------------| + | `MinMaxScaler` | Available | Available | | + | `StandardScaler` | Available | Available | | + | `QuantileTransformer` | Available | Available | | + | `FunctionTransformer` | Available | Available | Supports user-defined C functions | + | `ColumnTransformer` | Available | Available | Only integer column indices | + | `Pipeline` | Available | Partial | Pipelines of pipelines break | #### Scikit-Learn models | Model | Implementation | Test | Notes | diff --git a/scikinC/GBDTTraversalConverter.py b/scikinC/GBDTTraversalConverter.py index ad263d8..3850949 100644 --- a/scikinC/GBDTTraversalConverter.py +++ b/scikinC/GBDTTraversalConverter.py @@ -2,7 +2,7 @@ from scikinC import BaseConverter import numpy as np -from scikinC._tools import array2c, retrieve_prior +from scikinC._tools import array2c, retrieve_prior, sklearn_min_version class GBDTTraversalConverter (BaseConverter): """ @@ -41,9 +41,11 @@ def convert(self, bdt, name=None): min_, max_=self._get_limits(bdt) - nX = bdt.n_features_in_ + nX = bdt.n_features_in_ - retvar="FLOAT_T ret[%d]" % n_classes + n_output = max(2, n_classes) if sklearn_min_version("1.0") else n_classes + + retvar="FLOAT_T ret[%d]" % n_output invar="FLOAT_T inp[%d]" % nX lines += [ "#include ", @@ -100,20 +102,22 @@ def convert(self, bdt, name=None): for iClass in range (n_classes): - lines.append ( " " - "accumulator[%(iClass)d] += %(learningrate).10f * __%(name)s_traversal ( inp, " - " v%(iTree)03d_%(iClass)02d, t%(iTree)03d_%(iClass)02d, f%(iTree)03d_%(iClass)02d, l%(iTree)03d_%(iClass)02d, r%(iTree)03d_%(iClass)02d ); " - % dict( - learningrate = bdt.learning_rate, - maxlen = len (tree[iClass].tree_.feature), - iClass = iClass, - iTree = iTree, - name = name or "bdt", - value = array2c ([v[0][0] for v in tree[iClass].tree_.value]), - threshold = array2c (threshold, "%.20f"), - feature = array2c (feature, "%.0f"), - left = array2c ([l for l in tree[iClass].tree_.children_left], "%.0f"), - right = array2c ([r for r in tree[iClass].tree_.children_right], "%.0f"), + class_id = 1 if n_classes == 1 else iClass + lines.append ( " " + "accumulator[%(class_id)d] += %(learningrate).10f * __%(name)s_traversal ( inp, " + " v%(iTree)03d_%(iClass)02d, t%(iTree)03d_%(iClass)02d, f%(iTree)03d_%(iClass)02d, l%(iTree)03d_%(iClass)02d, r%(iTree)03d_%(iClass)02d ); " + % dict( + class_id=class_id, + learningrate=bdt.learning_rate, + maxlen=len (tree[iClass].tree_.feature), + iClass=iClass, + iTree=iTree, + name=name or "bdt", + value=array2c ([v[0][0] for v in tree[iClass].tree_.value]), + threshold=array2c (threshold, "%.20f"), + feature=array2c (feature, "%.0f"), + left=array2c ([l for l in tree[iClass].tree_.children_left], "%.0f"), + right=array2c ([r for r in tree[iClass].tree_.children_right], "%.0f"), )) lines.append (" }") @@ -130,20 +134,19 @@ def convert(self, bdt, name=None): lines.append(" update_%s_tree%03d (acc, inp); " % (name or bdt, iTree)) - - if n_classes > 1: + if n_output > 1: lines += [ " short argmax = 0; ", - " for (i = 0; i < %d; ++i) if (acc[i] > acc[argmax]) argmax = i; " % n_classes, + " for (i = 0; i < %d; ++i) if (acc[i] > acc[argmax]) argmax = i; " % n_output, " if (acc[argmax] > 1e10) { ", - " for (i = 0; i < %d; ++i) ret[i] = (i==argmax ? 1.: 0.); " % n_classes, + " for (i = 0; i < %d; ++i) ret[i] = (i==argmax ? 1.: 0.); " % n_output, " return ret; ", " }", - " for (i=0; i < %d; ++i) acc[i] = exp(acc[i]);" % n_classes, - " for (i=0; i < %d; ++i) acc[i] = (acc[i] > 1e300?1e300:acc[i]);" % n_classes, + " for (i=0; i < %d; ++i) acc[i] = exp(acc[i]);" % n_output, + " for (i=0; i < %d; ++i) acc[i] = (acc[i] > 1e300?1e300:acc[i]);" % n_output, " long double sum = 0;", - " for (i=0; i < %d; ++i) sum += acc[i];" % n_classes, - " for (i=0; i < %d; ++i) acc[i] /= sum;" % n_classes, + " for (i=0; i < %d; ++i) sum += acc[i];" % n_output, + " for (i=0; i < %d; ++i) acc[i] /= sum;" % n_output, ] else: lines += [ @@ -153,7 +156,7 @@ def convert(self, bdt, name=None): lines += [ - " for (i = 0; i < %d; ++i) ret[i] = acc[i];" % n_classes, + " for (i = 0; i < %d; ++i) ret[i] = acc[i];" % n_output, " return ret;", "}" ] diff --git a/scikinC/GBDTUnrollingConverter.py b/scikinC/GBDTUnrollingConverter.py index 147c121..b755075 100644 --- a/scikinC/GBDTUnrollingConverter.py +++ b/scikinC/GBDTUnrollingConverter.py @@ -2,106 +2,105 @@ from scikinC import BaseConverter import numpy as np -from scikinC._tools import array2c, retrieve_prior - - -class GBDTUnrollingConverter (BaseConverter): - """ - Converts GradientBoostingClassifiers with explicit conversion of - each tree in C language. Resulting C takes longer to compile, but it is - slightly faster in inference, and does not require pointer algebra. - """ - - def _singletree(self, tree, node): - "Single-tree traversal" - if tree.feature[node] >= 0: - return "(inp[%d] <= %.20f ? %s : %s)" % (tree.feature[node], - tree.threshold[node], - self._singletree(tree, tree.children_left[node]), - self._singletree(tree, tree.children_right[node])) - else: - return str(tree.value[node][0][0]) - - - @ staticmethod - def _get_limits(bdt): - mins=[None] * bdt.n_features_in_ - maxs=[None] * bdt.n_features_in_ - - for treeset in bdt.estimators_: - for tree in treeset: - for feature in range(bdt.n_features_in_): - features=tree.tree_.feature - if feature not in features: continue - min_=np.min(tree.tree_.threshold[features == feature]) - if mins[feature] is None or min_ < mins[feature]: - mins[feature]=min_ - - max_=np.max(tree.tree_.threshold[features == feature]) - if maxs[feature] is None or max_ > maxs[feature]: - maxs[feature]=max_ - - return mins, maxs - - - - - def convert(self, bdt, name=None): - n_classes=bdt.n_classes_ if bdt.n_classes_ > 2 else 1 - lines=self.header() - - if n_classes > 1: - for iClass in range(n_classes): - lines.append("/* ret [ %d ] is the probability for category: %-15s */" % - (iClass, str(bdt.classes_[iClass]))) - - min_, max_=self._get_limits(bdt) - - nX = bdt.n_features_in_ - - retvar="FLOAT_T ret[%d]" % n_classes - invar="FLOAT_T inp[%d]" % nX - lines += [ - "#include ", - "extern \"C\"", - "FLOAT_T *%s (%s, const %s)" % (name or "bdt", retvar, invar), - "{", - " const FLOAT_T init[] = %s;" % array2c(retrieve_prior(bdt)), - " int i; ", - " for (i=0; i < %d; ++i) ret[i] = init[i];" % n_classes, - ] - - for iTree, tree in enumerate(bdt.estimators_): - lines += [" /** TREE %03d **/" % iTree] - for iClass in range(n_classes): +from scikinC._tools import array2c, retrieve_prior, sklearn_min_version + + + +class GBDTUnrollingConverter(BaseConverter): + """ + Converts GradientBoostingClassifiers with explicit conversion of + each tree in C language. Resulting C takes longer to compile, but it is + slightly faster in inference, and does not require pointer algebra. + """ + + def _singletree(self, tree, node): + "Single-tree traversal" + if tree.feature[node] >= 0: + return "(inp[%d] <= %.20f ? %s : %s)" % (tree.feature[node], + tree.threshold[node], + self._singletree(tree, tree.children_left[node]), + self._singletree(tree, tree.children_right[node])) + else: + return str(tree.value[node][0][0]) + + @staticmethod + def _get_limits(bdt): + mins = [None] * bdt.n_features_in_ + maxs = [None] * bdt.n_features_in_ + + for treeset in bdt.estimators_: + for tree in treeset: + for feature in range(bdt.n_features_in_): + features = tree.tree_.feature + if feature not in features: continue + min_ = np.min(tree.tree_.threshold[features == feature]) + if mins[feature] is None or min_ < mins[feature]: + mins[feature] = min_ + + max_ = np.max(tree.tree_.threshold[features == feature]) + if maxs[feature] is None or max_ > maxs[feature]: + maxs[feature] = max_ + + return mins, maxs + + def convert(self, bdt, name=None): + n_classes = bdt.n_classes_ if bdt.n_classes_ > 2 else 1 + lines = self.header() + + if n_classes > 1: + for iClass in range(n_classes): + lines.append( + "/* ret [ %d ] is the probability for category: %-15s */" % + (iClass, str(bdt.classes_[iClass])) + ) + + min_, max_ = self._get_limits(bdt) + + nX = bdt.n_features_in_ + n_output = max(2, n_classes) if sklearn_min_version("1.0") else n_classes + + retvar = "FLOAT_T ret[%d]" % n_output + invar = "FLOAT_T inp[%d]" % nX lines += [ - " ret[%d] += %f * (%s); " % (iClass, bdt.learning_rate, - self._singletree(tree[iClass].tree_, 0)) - ] - - - if n_classes > 1: - lines += [ - " short argmax = 0; ", - " for (int i = 0; i < %d; ++i) if (ret[i] > ret[argmax]) argmax = i; " % n_classes, - " if (ret[argmax] > 1e10) { ", - " for (int i = 0; i < %d; ++i) ret[i] = (i==argmax ? 1.: 0.); " % n_classes, - " return ret; ", - " }", - " for (short i=0; i < %d; ++i) ret[i] = exp(ret[i]);" % n_classes, - " for (short i=0; i < %d; ++i) ret[i] = (ret[i] > 1e300?1e300:ret[i]);" % n_classes, - " long double sum = 0;", - " for (short i=0; i < %d; ++i) sum += ret[i];" % n_classes, - " for (short i=0; i < %d; ++i) ret[i] /= sum;" % n_classes, + "#include ", + "extern \"C\"", + "FLOAT_T *%s (%s, const %s)" % (name or "bdt", retvar, invar), + "{", + " const FLOAT_T init[] = %s;" % array2c(retrieve_prior(bdt)), + " int i; ", + " for (i=0; i < %d; ++i) ret[i] = init[i];" % n_output, ] - else: - lines += [ - " if (ret[0] > 1e10) ret[0] = 1.;", - " else ret[0] = 1. / (1 + exp(-ret[0]));" - ] - - - lines += [" return ret;", "}"] - - return "\n".join(lines) + for iTree, tree in enumerate(bdt.estimators_): + lines += [" /** TREE %03d **/" % iTree] + for iClass in range(n_classes): + class_id = 1 if n_classes == 1 else iClass + lines += [ + " ret[%d] += %f * (%s); " % (class_id, bdt.learning_rate, + self._singletree(tree[iClass].tree_, 0)) + ] + + + if n_output > 1: + lines += [ + " short argmax = 0; ", + " for (int i = 0; i < %d; ++i) if (ret[i] > ret[argmax]) argmax = i; " % n_output, + " if (ret[argmax] > 1e10) { ", + " for (int i = 0; i < %d; ++i) ret[i] = (i==argmax ? 1.: 0.); " % n_output, + " return ret; ", + " }", + " for (short i=0; i < %d; ++i) ret[i] = exp(ret[i]);" % n_output, + " for (short i=0; i < %d; ++i) ret[i] = (ret[i] > 1e300?1e300:ret[i]);" % n_output, + " long double sum = 0;", + " for (short i=0; i < %d; ++i) sum += ret[i];" % n_output, + " for (short i=0; i < %d; ++i) ret[i] /= sum;" % n_output, + ] + else: + lines += [ + " if (ret[0] > 1e10) ret[0] = 1.;", + " else ret[0] = 1. / (1 + exp(-ret[0]));" + ] + + lines += [" return ret;", "}"] + + return "\n".join(lines) diff --git a/scikinC/ModelLoader.py b/scikinC/ModelLoader.py index 01f37c5..1b424ba 100644 --- a/scikinC/ModelLoader.py +++ b/scikinC/ModelLoader.py @@ -33,6 +33,10 @@ def load_from_string ( string ): if os.path.isfile (string): + if string.endswith(".keras"): + from tensorflow.keras.models import load_model + return ({name or _basename(string): load_model (string, compile=False)},) + try: with open ( string, 'rb' ) as f: ## it is a pickled object diff --git a/scikinC/_tools.py b/scikinC/_tools.py index 901a09a..5ab1692 100644 --- a/scikinC/_tools.py +++ b/scikinC/_tools.py @@ -1,6 +1,9 @@ import numpy as np from sklearn.dummy import DummyClassifier from scikinC import get_converters, InvertibleConverter +import sys +import sklearn +from packaging import version ################################################################################ def array2c (array, fmt = None): @@ -49,9 +52,18 @@ def retrieve_prior (bdt): "Retrieve the prior for BDT classifiers" if bdt.init_ == 'zero': return np.zeros(bdt.n_classes_) - elif isinstance (bdt.init_, DummyClassifier): + elif isinstance (bdt.init_, DummyClassifier) and hasattr(bdt, 'loss_'): X = np.empty([1, bdt.n_classes_]) return np.asarray(bdt.loss_.get_init_raw_predictions(X, bdt.init_)).ravel() + elif isinstance (bdt.init_, DummyClassifier) and hasattr(bdt.init_, 'predict_proba'): + X = np.zeros([1, bdt.n_features_in_], dtype=np.float32) + + def inverse_softmax(probs): + probs = np.clip(probs, 1e-15, 1 - 1e-15) + return np.log(probs) + + ret = inverse_softmax(bdt.init_.predict_proba(X)).ravel() + return ret raise NotImplementedError ( "Cannot convert initializer %s" % str(bdt.init_) ) @@ -96,3 +108,7 @@ def is_invertible (model): module = __import__ ( "scikinC.%s" % converter, fromlist = [converter]) ret = getattr(module, converter).INVERTIBLE return ret + + +def sklearn_min_version(req_version): + return version.parse(sklearn.__version__) >= version.parse(req_version) \ No newline at end of file diff --git a/scikinC/layers/LeakyReLU.py b/scikinC/layers/LeakyReLU.py index 03e1b4b..5c6658b 100644 --- a/scikinC/layers/LeakyReLU.py +++ b/scikinC/layers/LeakyReLU.py @@ -27,7 +27,7 @@ def definition(self): """ % dict( layername = self.name, nX = nX, - alpha = self.layer.alpha + alpha = self.layer.alpha if hasattr(self.layer, 'alpha') else self.layer.negative_slope )] return "\n".join(ret) diff --git a/test/testing_boilerplate.py b/test/fixture_registry.py similarity index 70% rename from test/testing_boilerplate.py rename to test/fixture_registry.py index c7e0261..da1c8b5 100644 --- a/test/testing_boilerplate.py +++ b/test/fixture_registry.py @@ -9,6 +9,7 @@ import sys import pytest + class FixtureRegistry: """ A registry to track pytest fixtures per module and generate @@ -18,28 +19,31 @@ class FixtureRegistry: def __init__(self): self._fixtures_per_module = dict() - def register(self, test_category=None): + def register(self, *test_categories): """ Decorator to register a pytest fixture and store its name under the calling module. Args: - f (function): The fixture function + *test_categories: strings identifying sets of fixtures to be tested separately. + Tests always belong to the default group named after the module itself. + Note that name sets are visible at global scope, it is client's responsibility to ensure uniqueness + of the set names (if intended). Returns: function: The same function wrapped as a pytest fixture """ - if test_category is None: - test_category = sys._getframe(1).f_globals["__name__"] + test_categories = [sys._getframe(1).f_globals["__name__"], *test_categories] def decorator(f): fixture_name = f.__name__ - # Initialize fixture set for the module if needed - self._fixtures_per_module.setdefault(test_category, set()) + for test_category in test_categories: + # Initialize fixture set for the module if needed + self._fixtures_per_module.setdefault(test_category, set()) - # Register the fixture name - self._fixtures_per_module[test_category].add(fixture_name) + # Register the fixture name + self._fixtures_per_module[test_category].add(fixture_name) # Return the input function as a fixture return pytest.fixture(f) diff --git a/test/test_ColumnTransformerConverter.py b/test/test_ColumnTransformerConverter.py index 75bbd21..3d67225 100644 --- a/test/test_ColumnTransformerConverter.py +++ b/test/test_ColumnTransformerConverter.py @@ -6,11 +6,12 @@ import pytest # Local testing infrastructure -from wrap import deploy_pickle +from wrap import deploy_pickle +from fixture_registry import fixtures ################################################################################ ## Test preparation -@pytest.fixture +@fixtures.register('invertible') def passthrough_transformer(): transformer_ = ColumnTransformer([], remainder='passthrough') X = np.random.uniform (20,30,(1000, 10)) @@ -18,7 +19,7 @@ def passthrough_transformer(): return transformer_ -@pytest.fixture +@fixtures.register() def double_passthrough_transformer(): transformer_ = ColumnTransformer([ ('keep1', 'passthrough', [0,2]), @@ -29,7 +30,7 @@ def double_passthrough_transformer(): return transformer_ -@pytest.fixture +@fixtures.register('invertible') def ss_and_passthrough_transformer(): transformer_ = ColumnTransformer([ ('ss', StandardScaler(), [1,2,3]), @@ -39,7 +40,7 @@ def ss_and_passthrough_transformer(): return transformer_ -@pytest.fixture +@fixtures.register('invertible') def qt_and_passthrough_transformer(): transformer_ = ColumnTransformer([ ('qt', QuantileTransformer(output_distribution='normal'), [0,2]), @@ -49,7 +50,7 @@ def qt_and_passthrough_transformer(): return transformer_ -@pytest.fixture +@fixtures.register('invertible') def double_qt_and_passthrough_transformer(): transformer_ = ColumnTransformer([ ('qt1', QuantileTransformer(n_quantiles=100, output_distribution='normal'), [3,4]), @@ -60,7 +61,7 @@ def double_qt_and_passthrough_transformer(): return transformer_ -@pytest.fixture +@fixtures.register('invertible') def qt_and_ss_and_passthrough_transformer(): transformer_ = ColumnTransformer([ ('qt', QuantileTransformer(output_distribution='normal'), [0,1]), @@ -71,7 +72,7 @@ def qt_and_ss_and_passthrough_transformer(): return transformer_ -@pytest.fixture +@fixtures.register('invertible') def qt_and_ft_transformer_only(): transformer_ = ColumnTransformer([ ('qt', QuantileTransformer(output_distribution='normal'), [0,1,2,3,4]), @@ -82,7 +83,7 @@ def qt_and_ft_transformer_only(): return transformer_ -@pytest.fixture +@fixtures.register('invertible') def double_qt_transformer_only(): transformer_ = ColumnTransformer([ ('qt1', QuantileTransformer(n_quantiles=100, output_distribution='normal'), [5,6,7,8,9]), @@ -93,7 +94,7 @@ def double_qt_transformer_only(): return transformer_ -@pytest.fixture +@fixtures.register() def qt_and_ft_transformer_dropping(): transformer_ = ColumnTransformer([ ('qt', QuantileTransformer(output_distribution='normal'), [0,2]), @@ -104,34 +105,10 @@ def qt_and_ft_transformer_dropping(): return transformer_ -transformers = [ - 'passthrough_transformer', - 'double_passthrough_transformer', - 'ss_and_passthrough_transformer', - 'qt_and_passthrough_transformer', - 'double_qt_and_passthrough_transformer', - 'qt_and_ss_and_passthrough_transformer', - 'qt_and_ft_transformer_only', - 'double_qt_transformer_only', - 'qt_and_ft_transformer_dropping', - ] - -invertible_transformers = [ - 'passthrough_transformer', - 'ss_and_passthrough_transformer', - 'qt_and_passthrough_transformer', - 'double_qt_and_passthrough_transformer', - 'qt_and_ss_and_passthrough_transformer', - 'qt_and_ft_transformer_only', - 'double_qt_transformer_only' - ] - - ################################################################################ ## Real tests -@pytest.mark.parametrize ('scaler', transformers) -def test_forward (scaler, request): - scaler = request.getfixturevalue(scaler) +@fixtures.test() +def test_forward (scaler): deployed = deploy_pickle("functiontransformer", scaler) xtest = np.random.uniform (21,29, 10) py = scaler.transform (xtest[None]) @@ -141,9 +118,8 @@ def test_forward (scaler, request): assert np.abs(py-c).max() < 1e-4 -@pytest.mark.parametrize ('scaler', invertible_transformers) -def test_inverse (scaler, request): - scaler = request.getfixturevalue(scaler) +@fixtures.test('invertible') +def test_inverse (scaler): deployed = deploy_pickle("function_transformer", scaler) xtest = np.random.uniform (0,1, 10) py = np.empty (10) diff --git a/test/test_FastQuantileLayer.py b/test/test_FastQuantileLayer.py deleted file mode 100644 index 3a73ad2..0000000 --- a/test/test_FastQuantileLayer.py +++ /dev/null @@ -1,60 +0,0 @@ -import numpy as np -from FastQuantileLayer import FastQuantileLayer - -# PyTest testing infrastructure -import pytest - -# Local testing infrastructure -from wrap import deploy_pickle - -################################################################################ -## Test preparation - -@pytest.fixture -def scaler_uniform(): - scaler_ = FastQuantileLayer() - X = np.random.uniform (20,30,(1000, 10)) - scaler_.fit (X) - return scaler_ - -@pytest.fixture -def scaler_bool(): - scaler_ = FastQuantileLayer(output_distribution='normal') - X = np.random.choice ([0., 1.],(1000, 10), [0.8, 0.2]) - scaler_.fit (X) - return scaler_ - -@pytest.fixture -def scaler_normal(): - scaler_ = FastQuantileLayer(output_distribution='normal') - X = np.random.uniform (20,30,(1000, 10)) - scaler_.fit (X) - return scaler_ - - -scalers = ['scaler_uniform', 'scaler_bool', 'scaler_normal'] - -################################################################################ -## Real tests -@pytest.mark.parametrize ('scaler', scalers) -def test_forward (scaler, request): - scaler = request.getfixturevalue(scaler) - deployed = deploy_pickle("fastQL", scaler) - xtest = np.random.uniform (20,30, 10) - py = scaler.transform (xtest[None]).numpy() - c = deployed.transform (10, xtest) - assert np.abs(py-c).max() < 1e-5 - - -@pytest.mark.parametrize ('scaler', scalers) -def test_inverse (scaler, request): - scaler = request.getfixturevalue(scaler) - deployed = deploy_pickle("fastQL", scaler) - xtest = np.random.uniform (0,1, 10) - py = scaler.transform (xtest[None], inverse=True).numpy() - c = deployed.transform_inverse (10, xtest) - assert np.abs(py-c).max() < 1e-5 - - - - diff --git a/test/test_FunctionTransformer.py b/test/test_FunctionTransformer.py new file mode 100644 index 0000000..4788922 --- /dev/null +++ b/test/test_FunctionTransformer.py @@ -0,0 +1,63 @@ +import numpy as np +from sklearn.preprocessing import FunctionTransformer + +from scikinC.decorators import inline_c + +# Local testing infrastructure +from wrap import deploy_pickle +from fixture_registry import fixtures + + +################################################################################ +## Test preparation +@fixtures.register() +def empty_transformer(): + transformer_ = FunctionTransformer(validate=True) + X = np.random.uniform(20, 30, (1000, 10)) + transformer_.fit(X) + return transformer_ + + +@fixtures.register() +def log_transformer(): + transformer_ = FunctionTransformer(np.log, np.exp, validate=True) + X = np.random.uniform(20, 30, (1000, 10)) + transformer_.fit(X) + return transformer_ + + +@fixtures.register() +def custom_transformer(): + transformer_ = FunctionTransformer(np.square, np.sqrt, validate=True) + transformer_.func_inC = 'pow({x}, 2)' + X = np.random.uniform(20, 30, (1000, 10)) + transformer_.fit(X) + return transformer_ + + +@fixtures.register() +def empty_transformer_wo_fit(): + transformer_ = FunctionTransformer() + transformer_.n_features_in_ = 10 + return transformer_ + + +################################################################################ +## Real tests +@fixtures.test() +def test_forward(ft): + deployed = deploy_pickle("functiontransformer", ft) + xtest = np.random.uniform(21, 29, 10) + py = ft.transform(xtest[None]) + c = deployed.transform(10, xtest) + print(xtest, "->", c, " instead of: ", py) + assert np.abs(py - c).max() < 1e-4 + + +@fixtures.test() +def test_inverse(ft): + deployed = deploy_pickle("functiontransformer", ft) + xtest = np.random.uniform(0, 1, 10) + py = ft.inverse_transform(xtest[None]) + c = deployed.transform_inverse(10, xtest) + assert np.abs(py - c).max() < 1e-4 diff --git a/test/test_FunctionTransformerConverter.py b/test/test_FunctionTransformerConverter.py deleted file mode 100644 index 5a83976..0000000 --- a/test/test_FunctionTransformerConverter.py +++ /dev/null @@ -1,75 +0,0 @@ -import numpy as np -from sklearn.preprocessing import FunctionTransformer - -# PyTest testing infrastructure -import pytest - -# Local testing infrastructure -from wrap import deploy_pickle - -################################################################################ -## Test preparation -@pytest.fixture -def empty_transformer(): - transformer_ = FunctionTransformer(validate=True) - X = np.random.uniform (20,30,(1000, 10)) - transformer_.fit (X) - return transformer_ - -@pytest.fixture -def log_transformer(): - transformer_ = FunctionTransformer(np.log, np.exp, validate=True) - X = np.random.uniform (20,30,(1000, 10)) - transformer_.fit (X) - return transformer_ - -@pytest.fixture -def custom_transformer(): - transformer_ = FunctionTransformer(np.square, np.sqrt, validate=True) - transformer_.func_inC = 'pow({x}, 2)' - X = np.random.uniform (20,30,(1000, 10)) - transformer_.fit (X) - return transformer_ - - -@pytest.fixture -def empty_transformer_wo_fit(): - transformer_ = FunctionTransformer() - transformer_.n_features_in_ = 10 - return transformer_ - - - -scalers = [ - 'empty_transformer', - 'log_transformer', - 'custom_transformer', - 'empty_transformer_wo_fit', - ] - - -################################################################################ -## Real tests -@pytest.mark.parametrize ('scaler', scalers) -def test_forward (scaler, request): - scaler = request.getfixturevalue(scaler) - deployed = deploy_pickle("functiontransformer", scaler) - xtest = np.random.uniform (21,29, 10) - py = scaler.transform (xtest[None]) - c = deployed.transform (10, xtest) - print (xtest, "->", c, " instead of: ", py) - assert np.abs(py-c).max() < 1e-4 - - -@pytest.mark.parametrize ('scaler', scalers) -def test_inverse (scaler, request): - scaler = request.getfixturevalue(scaler) - deployed = deploy_pickle("function_transformer", scaler) - xtest = np.random.uniform (0,1, 10) - py = scaler.inverse_transform (xtest[None]) - c = deployed.transform_inverse (10, xtest) - assert np.abs(py-c).max() < 1e-4 - - - - diff --git a/test/test_GBDTC.py b/test/test_GBDTC.py index 904740d..87d4a31 100644 --- a/test/test_GBDTC.py +++ b/test/test_GBDTC.py @@ -1,85 +1,118 @@ -import numpy as np -from sklearn.ensemble import GradientBoostingClassifier +import numpy as np +from sklearn.ensemble import GradientBoostingClassifier # PyTest testing infrastructure import pytest # Local testing infrastructure -from wrap import deploy_pickle +from wrap import deploy_pickle +from fixture_registry import fixtures + ################################################################################ ## Test preparation -@pytest.fixture -def classifier(): - classifier_ = GradientBoostingClassifier() - X = np.concatenate (( - np.random.normal (0,2,(1000, 10)), - np.random.normal (1,3,( 100, 10)), - np.random.normal (2,4,( 10, 10)), - )) - y = np.array ( - [0] * 1000 + [1] * 100 + [2] * 10 ) - classifier_.fit (X, y) - return classifier_ - - -@pytest.fixture -def deployed(classifier): - return deploy_pickle("gbdtc", classifier) - - -@pytest.fixture +@fixtures.register() +def binary_classifier(): + classifier_ = GradientBoostingClassifier(n_estimators=10, init='zero') + X = np.concatenate( + ( + np.random.normal(0, 2, (1000, 10)), + np.random.normal(1, 3, (1000, 10)), + ) + ) + y = np.array( + [0] * 1000 + [1] * 1000 + ) + classifier_.fit(X, y) + return classifier_ + +@fixtures.register() +def deep_binary_classifier(): + classifier_ = GradientBoostingClassifier(n_estimators=10, init='zero', max_depth=8) + X = np.concatenate( + ( + np.random.normal(0, 2, (1000, 10)), + np.random.normal(1, 3, (1000, 10)), + ) + ) + y = np.array( + [0] * 1000 + [1] * 1000 + ) + classifier_.fit(X, y) + return classifier_ + +@fixtures.register() +def multiclass_classifier(): + classifier_ = GradientBoostingClassifier(n_estimators=10) + X = np.concatenate( + ( + np.random.normal(0, 2, (1000, 10)), + np.random.normal(1, 3, (100, 10)), + np.random.normal(2, 4, (10, 10)), + ) + ) + y = np.array( + [0] * 1000 + [1] * 100 + [2] * 10 + ) + classifier_.fit(X, y) + return classifier_ + +@fixtures.register() +def zero_init(): + classifier_ = GradientBoostingClassifier(n_estimators=10, init='zero') + X = np.concatenate( + ( + np.random.normal(0, 2, (1000, 10)), + np.random.normal(1, 3, (100, 10)), + np.random.normal(2, 4, (10, 10)), + ) + ) + y = np.array( + [0] * 1000 + [1] * 100 + [2] * 10 + ) + classifier_.fit(X, y) + return classifier_ + + +@fixtures.register() def deep_classifier(): - classifier_ = GradientBoostingClassifier(max_depth=8) - X = np.concatenate (( - np.random.normal (0,2,(1000, 10)), - np.random.normal (1,3,( 100, 10)), - np.random.normal (2,4,( 10, 10)), - )) - y = np.array ( - [0] * 1000 + [1] * 100 + [2] * 10 ) - classifier_.fit (X, y) - return classifier_ - + classifier_ = GradientBoostingClassifier(max_depth=8) + X = np.concatenate( + ( + np.random.normal(0, 2, (1000, 10)), + np.random.normal(1, 3, (100, 10)), + np.random.normal(2, 4, (10, 10)), + ) + ) + y = np.array( + [0] * 1000 + [1] * 100 + [2] * 10 + ) + classifier_.fit(X, y) + return classifier_ -@pytest.fixture -def deep_deployed(deep_classifier): - return deploy_pickle("gbdtcD", deep_classifier) ################################################################################ ## Real tests -def test_normalization (classifier, deployed): - xtest = np.random.uniform (0,1, 10) - py = classifier.predict_proba (xtest[None])[0] - c = deployed.transform (len(py), xtest) - - assert np.abs(np.sum(c)-1).max() < 1e-5 - -def test_predict (classifier, deployed): - xtest = np.random.uniform (0,1, 10) - py = classifier.predict_proba (xtest[None])[0] - c = deployed.transform (len(py), xtest) - - print (np.c_[py, c]) - assert np.abs(py-c).max() < 1e-5 - +@fixtures.test() +def test_normalization(classifier): + deployed = deploy_pickle("gbdtcD", classifier) + xtest = np.random.uniform(0, 1, 10) + py = classifier.predict_proba(xtest[None])[0] + c = deployed.transform(len(py), xtest) -def test_deep_normalization (deep_classifier, deep_deployed): - xtest = np.random.uniform (0,1, 10) - py = deep_classifier.predict_proba (xtest[None])[0] - c = deep_deployed.transform (len(py), xtest) + print (py, c) - assert np.abs(np.sum(c)-1).max() < 1e-5 + assert np.abs(np.sum(c) - 1).max() < 1e-5 -def test_deep_predict (deep_classifier, deep_deployed): - xtest = np.random.uniform (0,1, 10) - py = deep_classifier.predict_proba (xtest[None])[0] - c = deep_deployed.transform (len(py), xtest) - print (np.c_[py, c]) - assert np.abs(py-c).max() < 1e-5 - +@fixtures.test() +def test_predict(classifier): + deployed = deploy_pickle("gbdtcD", classifier) + xtest = np.random.uniform(0, 1, 10) + py = classifier.predict_proba(xtest[None])[0] + c = deployed.transform(len(py), xtest) - + print(np.c_[py, c]) + assert np.abs(py - c).max() < 1e-5 diff --git a/test/test_Pipeline.py b/test/test_Pipeline.py index abe068e..89716e9 100644 --- a/test/test_Pipeline.py +++ b/test/test_Pipeline.py @@ -1,50 +1,79 @@ -import numpy as np +import numpy as np +from rich.table import Column from sklearn.preprocessing import MinMaxScaler, StandardScaler -from sklearn.pipeline import Pipeline +from sklearn.pipeline import Pipeline +from sklearn.preprocessing import FunctionTransformer +from sklearn.compose import ColumnTransformer -# PyTest testing infrastructure import pytest # Local testing infrastructure -from wrap import deploy_pickle +from wrap import deploy_pickle +from fixture_registry import fixtures + ################################################################################ ## Test preparation -@pytest.fixture +@fixtures.register() def pipeline(): - X = np.concatenate (( - np.random.uniform (0,2,(1000, 10)), - np.random.uniform (1,3,(1000, 10)), - np.random.uniform (2,4,(1000, 10)), - )) + X = np.concatenate( + ( + np.random.uniform(0, 2, (1000, 10)), + np.random.uniform(1, 3, (1000, 10)), + np.random.uniform(2, 4, (1000, 10)), + ) + ) - step1 = MinMaxScaler () - X2 = step1.fit_transform (X) - step2 = StandardScaler() - step2.fit(X2) + step1 = MinMaxScaler() + X2 = step1.fit_transform(X) + step2 = StandardScaler() + step2.fit(X2) - return Pipeline(steps = ((("minmax", step1), ("standard", step2)))) + return Pipeline(steps=((("minmax", step1), ("standard", step2)))) -@pytest.fixture -def deployed(pipeline): - return deploy_pickle("pipeline", pipeline) +@fixtures.register() +def composition(): + X = np.concatenate( + ( + np.random.uniform(0, 2, (1000, 10)), + np.random.uniform(1, 3, (1000, 10)), + np.random.uniform(2, 4, (1000, 10)), + ) + ) + return Pipeline([ + ("cols", ColumnTransformer([ + ('log', FunctionTransformer(np.log), [0]), + ('passthrough', 'passthrough', [1, 2, 3, 4, 5, 6, 7, 8, 9]), + ])), + ("standard", StandardScaler()) + ]).fit(X) ################################################################################ ## Real tests -def test_pipeline (pipeline, deployed): - xtest = np.random.uniform (0,1, 10) - py = pipeline.transform (xtest[None])[0] - c = deployed.transform (len(py), xtest) - c_back = pipeline.inverse_transform (py[None]) - py_back = deployed.transform_inverse (len(py), py) +@fixtures.test() +def test_pipeline(pipeline): + deployed = deploy_pickle("pipeline", pipeline) + xtest = np.random.uniform(0, 1, 10) + py = pipeline.transform(xtest[None])[0] + c = deployed.transform(len(py), xtest) + + assert np.abs(py - c).max() < 1e-5 - assert np.abs(py-c).max() < 1e-5 - assert np.abs(py_back - c_back).max() < 1e-5 - +################################################################################ +## Real tests +@fixtures.test() +def test_inverted_pipeline(pipeline): + if not hasattr(pipeline, 'inverse_transform'): + return pytest.skip("Will not test inversion of not-invertible pipeline") - + deployed = deploy_pickle("pipeline", pipeline) + xtest = np.random.uniform(0, 1, 10) + py = pipeline.transform(xtest[None])[0] + c_back = pipeline.inverse_transform(py[None]) + py_back = deployed.transform_inverse(len(py), py) + assert np.abs(py_back - c_back).max() < 1e-5 diff --git a/test/test_PolynomialFeatures.py b/test/test_PolynomialFeatures.py index e9f3900..1f000fd 100644 --- a/test/test_PolynomialFeatures.py +++ b/test/test_PolynomialFeatures.py @@ -6,7 +6,7 @@ # Local testing infrastructure from wrap import deploy_pickle -from testing_boilerplate import fixtures +from fixture_registry import fixtures ################################################################################ diff --git a/test/test_QuantileTransformer.py b/test/test_QuantileTransformer.py index 77f9ea2..dfd3195 100644 --- a/test/test_QuantileTransformer.py +++ b/test/test_QuantileTransformer.py @@ -9,55 +9,49 @@ # Local testing infrastructure from wrap import deploy_pickle -from testing_boilerplate import fixtures +from fixture_registry import fixtures ################################################################################ ## Test preparation -@fixtures.register +@fixtures.register() def scaler_uniform(): scaler_ = QuantileTransformer() X = np.random.uniform (20,30,(1000, 20)) scaler_.fit (X) return scaler_ -@fixtures.register +@fixtures.register() def scaler_normal(): scaler_ = QuantileTransformer(output_distribution='normal', n_quantiles=100) X = np.random.uniform (20,30,(1000, 20)) scaler_.fit (X) return scaler_ -@fixtures.register +@fixtures.register() def scaler_bool_uniform(): scaler_ = QuantileTransformer(output_distribution='uniform') X = np.random.choice ([22.,27.], (1000, 20), (0.8, 0.2)) scaler_.fit (X) return scaler_ -@fixtures.register +@fixtures.register() def scaler_bool_normal(): scaler_ = QuantileTransformer(output_distribution='normal') X = np.random.choice ([22.,27.], (1000, 20), (0.8, 0.2)) scaler_.fit (X) return scaler_ -@fixtures.register +@fixtures.register() def scaler_delta_normal(): scaler_ = QuantileTransformer(output_distribution='normal') X = np.full((10000,20), np.pi) scaler_.fit (X) return scaler_ -def read_file(filename): - dir = os.path.dirname(__file__) - with open(os.path.join(dir, "pathologies", filename), 'rb') as f: - return pickle.load(f) - - ################################################################################ ## Real tests -@fixtures.test +@fixtures.test() def test_forward (scaler): n_features = scaler.n_features_in_ if hasattr(scaler, 'n_features_in_') else 20 @@ -74,7 +68,7 @@ def test_forward (scaler): array = np.array(results) print (array.T) -@fixtures.test +@fixtures.test() def test_inverse (scaler): if hasattr(scaler, 'transform_inverse'): deployed = deploy_pickle("quantiletransformer", scaler) diff --git a/test/test_pathologies.py b/test/test_pathologies.py new file mode 100644 index 0000000..a2b6746 --- /dev/null +++ b/test/test_pathologies.py @@ -0,0 +1,56 @@ +import os.path +import pickle + +import numpy as np + +# PyTest testing infrastructure +import pytest + +# Local testing infrastructure +from wrap import deploy_pickle +from fixture_registry import fixtures + + +def read_file(filename): + dir = os.path.dirname(__file__) + with open(os.path.join(dir, "pathologies", filename), 'rb') as f: + return pickle.load(f) + +@fixtures.register() +def pathology_1(): + return read_file('column_with_quantile_1.pkl') + +@fixtures.register() +def pathology_2(): + return read_file('column_with_quantile_2.pkl') + +@fixtures.register() +def pathology_3(): + return read_file('column_with_quantile_3.pkl') + +@fixtures.register() +def pathology_4(): + return read_file('column_with_quantile_4.pkl') + +################################################################################ +## Real tests +@fixtures.test() +def test_forward (scaler): + n_features = scaler.n_features_in_ if hasattr(scaler, 'n_features_in_') else 20 + + deployed = deploy_pickle("quantiletransformer", scaler) + results = [] + try: + for iAttempt in range(100): + xtest = np.random.uniform (-1000,-990, n_features) + try: + py = scaler.transform (xtest[None]) + except (AttributeError,): + pytest.skip(f"Failed running the pickled pathological example. Probably generated with old sklearn.") + + c = deployed.transform (n_features, xtest) + results.append ([py[0].flatten(), c.flatten(), np.abs(py[0]-c).flatten() > 1e-5 ]) + assert np.abs(py-c).max() < 1e-4 + finally: + array = np.array(results) + print (array.T) diff --git a/test/wrap.py b/test/wrap.py index 743d472..31752d5 100644 --- a/test/wrap.py +++ b/test/wrap.py @@ -86,7 +86,7 @@ def deploy_keras (name, obj, float_t = "float"): ### Randomize UID s = string.ascii_letters uid = [s[np.random.randint(len(s))] for _ in range(16)] - tmpfile = name + ''.join(uid) + tmpfile = name + ''.join(uid) + ".keras" obj.save(tmpfile) From f00829552207bf0ef936a9d67cacd0e47439a86e Mon Sep 17 00:00:00 2001 From: Lucio Anderlini Date: Wed, 10 Sep 2025 11:56:57 +0200 Subject: [PATCH 03/15] drafted gha to run pytest --- .github/workflows/pytest.yaml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 .github/workflows/pytest.yaml diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml new file mode 100644 index 0000000..5d3a21a --- /dev/null +++ b/.github/workflows/pytest.yaml @@ -0,0 +1,25 @@ +name: Pytest Suite + +on: + pull_request: main + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: [3.7, 3.8, 3.9, 3.10, 3.11] + name: Python ${{ matrix.python-version }} + steps: + - uses: actions/checkout@v3 + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + pip install .[keras] pytest pytest-mdreport + - name: Run tests + run: | + pytest --tb=short --maxfail=1 --durations=10 --md-report=report.md + cat report.md >> $GITHUB_STEP_SUMMARY \ No newline at end of file From 9e84c05365b9719c80534b09e0895fd29e7472bb Mon Sep 17 00:00:00 2001 From: Lucio Anderlini Date: Wed, 10 Sep 2025 11:58:38 +0200 Subject: [PATCH 04/15] fix syntax --- .github/workflows/pytest.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 5d3a21a..dcbb9fa 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -1,7 +1,9 @@ name: Pytest Suite on: - pull_request: main + pull_request: + branches: + - main jobs: test: From e9727b877a54065233265df0abf4d4361454cac8 Mon Sep 17 00:00:00 2001 From: Lucio Anderlini Date: Wed, 10 Sep 2025 11:59:37 +0200 Subject: [PATCH 05/15] drop support for python 3.7 --- .github/workflows/pytest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index dcbb9fa..186d868 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8, 3.9, 3.10, 3.11] + python-version: [3.8, 3.9, 3.10, 3.11] name: Python ${{ matrix.python-version }} steps: - uses: actions/checkout@v3 From 0a788c283c697140e1d21587a4bba086bd2573cc Mon Sep 17 00:00:00 2001 From: Lucio Anderlini Date: Wed, 10 Sep 2025 12:01:46 +0200 Subject: [PATCH 06/15] quoted python versions to avoid 3.10 == 3.1 --- .github/workflows/pytest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 186d868..a78b96f 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -10,7 +10,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.8, 3.9, 3.10, 3.11] + python-version: ["3.8", "3.9", "3.10", "3.11"] name: Python ${{ matrix.python-version }} steps: - uses: actions/checkout@v3 From ec4038fdade2e63f8bfd29513c13b7841ad2ff34 Mon Sep 17 00:00:00 2001 From: Lucio Anderlini Date: Wed, 10 Sep 2025 12:03:01 +0200 Subject: [PATCH 07/15] typo --- .github/workflows/pytest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index a78b96f..c8a6117 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -20,7 +20,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - pip install .[keras] pytest pytest-mdreport + pip install .[keras] pytest pytest-md-report - name: Run tests run: | pytest --tb=short --maxfail=1 --durations=10 --md-report=report.md From 218e89b47ef487132f35be2435125beb5611210d Mon Sep 17 00:00:00 2001 From: Lucio Anderlini Date: Wed, 10 Sep 2025 12:07:40 +0200 Subject: [PATCH 08/15] fixed md-report syntax --- .github/workflows/pytest.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index c8a6117..df6e79f 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -9,6 +9,7 @@ jobs: test: runs-on: ubuntu-latest strategy: + fail-fast: false matrix: python-version: ["3.8", "3.9", "3.10", "3.11"] name: Python ${{ matrix.python-version }} @@ -23,5 +24,5 @@ jobs: pip install .[keras] pytest pytest-md-report - name: Run tests run: | - pytest --tb=short --maxfail=1 --durations=10 --md-report=report.md + pytest --tb=short --durations=10 --md-report --md-report-output=report.md cat report.md >> $GITHUB_STEP_SUMMARY \ No newline at end of file From 5b5ada5c34dc6548be4228f587032311e505dfe3 Mon Sep 17 00:00:00 2001 From: Lucio Anderlini Date: Wed, 10 Sep 2025 14:34:05 +0200 Subject: [PATCH 09/15] enhanced verbosity and fixed wrong import --- .github/workflows/pytest.yaml | 2 ++ test/test_Pipeline.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index df6e79f..62ec377 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -23,6 +23,8 @@ jobs: run: | pip install .[keras] pytest pytest-md-report - name: Run tests + env: + pytest_verbosity: 2 run: | pytest --tb=short --durations=10 --md-report --md-report-output=report.md cat report.md >> $GITHUB_STEP_SUMMARY \ No newline at end of file diff --git a/test/test_Pipeline.py b/test/test_Pipeline.py index 89716e9..51cbf98 100644 --- a/test/test_Pipeline.py +++ b/test/test_Pipeline.py @@ -1,5 +1,4 @@ import numpy as np -from rich.table import Column from sklearn.preprocessing import MinMaxScaler, StandardScaler from sklearn.pipeline import Pipeline from sklearn.preprocessing import FunctionTransformer From 728102a8e64180f05d922e2cbfe2c850ab21a36c Mon Sep 17 00:00:00 2001 From: landerlini <44908794+landerlini@users.noreply.github.com> Date: Wed, 10 Sep 2025 16:58:12 +0200 Subject: [PATCH 10/15] Improve verbosity of test report --- .github/workflows/pytest.yaml | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 62ec377..14bafef 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -23,8 +23,12 @@ jobs: run: | pip install .[keras] pytest pytest-md-report - name: Run tests - env: - pytest_verbosity: 2 run: | - pytest --tb=short --durations=10 --md-report --md-report-output=report.md - cat report.md >> $GITHUB_STEP_SUMMARY \ No newline at end of file + pytest \ + --tb=short \ + --durations=10 \ + --md-report \ + --md-report-flavor gfm \ + --md-report-verbose=2 \ + --md-report-output=report.md + cat report.md >> $GITHUB_STEP_SUMMARY From a32651b9dc28fa067ddc0df0465cf3036b2bcec1 Mon Sep 17 00:00:00 2001 From: landerlini <44908794+landerlini@users.noreply.github.com> Date: Wed, 10 Sep 2025 17:20:20 +0200 Subject: [PATCH 11/15] Try including python3.6 and dropped markdown --- .github/workflows/pytest.yaml | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 14bafef..e0f5385 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -7,12 +7,21 @@ on: jobs: test: - runs-on: ubuntu-latest strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + include: + - {"version": "3.6", "os": "ubuntu-22.04"} + - {"version": "3.7", "os": "ubuntu-22.04"} + - {"version": "3.8", "os": "ubuntu-latest"} + - {"version": "3.9", "os": "ubuntu-latest"} + - {"version": "3.10", "os": "ubuntu-latest"} + - {"version": "3.11", "os": "ubuntu-latest"} + - {"version": "3.12", "os": "ubuntu-latest"} + - {"version": "3.13", "os": "ubuntu-latest"} + name: Python ${{ matrix.python-version }} + runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 - name: Setup Python @@ -25,10 +34,8 @@ jobs: - name: Run tests run: | pytest \ + -v \ --tb=short \ --durations=10 \ - --md-report \ - --md-report-flavor gfm \ - --md-report-verbose=2 \ - --md-report-output=report.md - cat report.md >> $GITHUB_STEP_SUMMARY + &> report + cat report >> $GITHUB_STEP_SUMMARY From 85d0bc21d47d8ec3c89ea9939ab2034a5230fde6 Mon Sep 17 00:00:00 2001 From: landerlini <44908794+landerlini@users.noreply.github.com> Date: Thu, 11 Sep 2025 08:42:32 +0200 Subject: [PATCH 12/15] Try dariocurr/pytest-summary --- .github/workflows/pytest.yaml | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index e0f5385..6d8106c 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -20,22 +20,28 @@ jobs: - {"version": "3.12", "os": "ubuntu-latest"} - {"version": "3.13", "os": "ubuntu-latest"} - name: Python ${{ matrix.python-version }} + name: Python ${{ matrix.version }} on ${{ matrix.os }} runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v3 - name: Setup Python uses: actions/setup-python@v4 with: - python-version: ${{ matrix.python-version }} + python-version: ${{ matrix.version }} - name: Install dependencies run: | pip install .[keras] pytest pytest-md-report - name: Run tests - run: | - pytest \ - -v \ - --tb=short \ - --durations=10 \ - &> report - cat report >> $GITHUB_STEP_SUMMARY + uses: dariocurr/pytest-summary@main + with: + options: -v --durations=10 --tb=short + + + # - name: Run tests + # run: | + # pytest \ + # -v \ + # --tb=short \ + # --durations=10 \ + # &> report + # cat report >> $GITHUB_STEP_SUMMARY From 345d8628cec2a55004ad55194dcdb9f0f36badbb Mon Sep 17 00:00:00 2001 From: landerlini <44908794+landerlini@users.noreply.github.com> Date: Thu, 11 Sep 2025 08:46:00 +0200 Subject: [PATCH 13/15] python3.6 moved to ubuntu 20 --- .github/workflows/pytest.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index 6d8106c..d27cb56 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -11,7 +11,7 @@ jobs: fail-fast: false matrix: include: - - {"version": "3.6", "os": "ubuntu-22.04"} + - {"version": "3.6", "os": "ubuntu-20.04"} - {"version": "3.7", "os": "ubuntu-22.04"} - {"version": "3.8", "os": "ubuntu-latest"} - {"version": "3.9", "os": "ubuntu-latest"} From 7f5c90a693f8217e278c1cd82928a162cd37efdd Mon Sep 17 00:00:00 2001 From: landerlini <44908794+landerlini@users.noreply.github.com> Date: Thu, 11 Sep 2025 08:50:58 +0200 Subject: [PATCH 14/15] dropped py 3.6 for no runners --- .github/workflows/pytest.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index d27cb56..ab7c037 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -11,7 +11,7 @@ jobs: fail-fast: false matrix: include: - - {"version": "3.6", "os": "ubuntu-20.04"} + # - {"version": "3.6", "os": "ubuntu-20.04"} # EOL: 2021-12-23 - {"version": "3.7", "os": "ubuntu-22.04"} - {"version": "3.8", "os": "ubuntu-latest"} - {"version": "3.9", "os": "ubuntu-latest"} @@ -34,7 +34,7 @@ jobs: - name: Run tests uses: dariocurr/pytest-summary@main with: - options: -v --durations=10 --tb=short + options: -v --durations=10 --tb=short test/ # - name: Run tests From 405970cde2f4cef992bd9f2cff5b0d54d8f6c034 Mon Sep 17 00:00:00 2001 From: landerlini <44908794+landerlini@users.noreply.github.com> Date: Thu, 11 Sep 2025 08:53:20 +0200 Subject: [PATCH 15/15] fixed test dir path --- .github/workflows/pytest.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pytest.yaml b/.github/workflows/pytest.yaml index ab7c037..a6c5d6b 100644 --- a/.github/workflows/pytest.yaml +++ b/.github/workflows/pytest.yaml @@ -34,7 +34,8 @@ jobs: - name: Run tests uses: dariocurr/pytest-summary@main with: - options: -v --durations=10 --tb=short test/ + options: -v --durations=10 --tb=short + paths: test/ # - name: Run tests