diff --git a/gext/fitting.py b/gext/fitting.py index efd09f08e317ccd7e962b3dc7596ef2a9e1ab927..21fa5a36b2ce4655b5aeba527433cd653007f9e8 100644 --- a/gext/fitting.py +++ b/gext/fitting.py @@ -64,7 +64,7 @@ class LeastSquare(AbstractFitting): a = matrix.T @ matrix b = matrix.T @ target if self.options["regularization"] > 0.0: - a += np.identity(len(b))*self.options["regularization"] + a += np.identity(len(b))*self.options["regularization"]**2 coefficients = np.linalg.solve(a, b) return np.array(coefficients, dtype=np.float64) @@ -103,7 +103,7 @@ class QuasiTimeReversible(AbstractFitting): b = time_reversible_matrix.T @ (target + past_target) if self.options["regularization"] > 0.0: - a += np.identity(len(b))*self.options["regularization"] + a += np.identity(len(b))*self.options["regularization"]**2 coefficients = np.linalg.solve(a, b) if q == 1: diff --git a/tests/test_descriptor_fitting.py b/tests/test_descriptor_fitting.py index f0ae573404705b11cfa65cb87d50facdfae97282..d0c78a000e26c43a0f8e8abc53956d31d0b41aef 100644 --- a/tests/test_descriptor_fitting.py +++ b/tests/test_descriptor_fitting.py @@ -129,3 +129,71 @@ def test_time_reversibility(datafile): # check the time reversibility assert np.linalg.norm(fitted_target - fitted_target_reverse, ord=np.inf) < SMALL + +@pytest.mark.parametrize("datafile", ["urea.json", "glucose.json"]) +@pytest.mark.parametrize("regularization", [0.001, 0.005, 0.01, 0.05, 0.1]) +def test_square_qtr_formulation(datafile, regularization): + + # load test data from json file + data = utils.load_json(f"tests/{datafile}") + nelectrons = data["nelectrons"] + natoms = data["trajectory"].shape[1] + nbasis = data["overlaps"].shape[1] + nframes = data["trajectory"].shape[0] + + # initialize an extrapolator + extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms, + nsteps=nframes, fitting="qtr", fitting_regularization=regularization) + + # load data in the extrapolator + for (coords, coeff, overlap) in zip(data["trajectory"], + data["coefficients"], data["overlaps"]): + extrapolator.load_data(coords, coeff, overlap) + + descriptors = extrapolator.descriptors.get(10) + target = descriptors[-1] + + fitting_calculator = extrapolator.fitting_calculator + alt_fitting_calculator = utils.AlternativeQuasiTimeReversible() + alt_fitting_calculator.set_options(regularization=regularization) + + # check if the two fittings give the same coefficients + for start in range(0, 8): + vectors = descriptors[start:-1] + fit_coefficients = fitting_calculator.fit(vectors, target) + alt_fit_coefficients = alt_fitting_calculator.fit(vectors, target) + assert np.linalg.norm(fit_coefficients - alt_fit_coefficients, ord=np.inf) < 1e-4 + +@pytest.mark.parametrize("datafile", ["urea.json", "glucose.json"]) +@pytest.mark.parametrize("regularization", [0.001, 0.005, 0.01, 0.05, 0.1]) +def test_square_ls_formulation(datafile, regularization): + + # load test data from json file + data = utils.load_json(f"tests/{datafile}") + nelectrons = data["nelectrons"] + natoms = data["trajectory"].shape[1] + nbasis = data["overlaps"].shape[1] + nframes = data["trajectory"].shape[0] + + # initialize an extrapolator + extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms, + nsteps=nframes, fitting="leastsquare", fitting_regularization=regularization) + + # load data in the extrapolator + for (coords, coeff, overlap) in zip(data["trajectory"], + data["coefficients"], data["overlaps"]): + extrapolator.load_data(coords, coeff, overlap) + + descriptors = extrapolator.descriptors.get(10) + target = descriptors[-1] + + fitting_calculator = extrapolator.fitting_calculator + alt_fitting_calculator = utils.AlternativeLeastSquare() + alt_fitting_calculator.set_options(regularization=regularization) + + # check if the two fittings give the same coefficients + for start in range(0, 8): + vectors = descriptors[start:-1] + fit_coefficients = fitting_calculator.fit(vectors, target) + alt_fit_coefficients = alt_fitting_calculator.fit(vectors, target) + assert np.linalg.norm(fit_coefficients - alt_fit_coefficients, ord=np.inf) < 1e-4 diff --git a/tests/utils.py b/tests/utils.py index 80ba090c3f9d29abde70c2f92abee66bd1b7361e..fdd02055b585b6bfb47229c73fd2f8851ce1d3b8 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,5 +1,11 @@ +import os +import sys import json import numpy as np +from typing import List + +sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) +from gext.fitting import LeastSquare, QuasiTimeReversible def load_json(path): with open(path, "r") as json_file: @@ -12,3 +18,52 @@ def load_json(path): else: data[key] = np.array(value) return data + +class AlternativeLeastSquare(LeastSquare): + + """Alternative least square minimization fitting.""" + + def fit(self, vectors: List[np.ndarray], target: np.ndarray): + """Given a set of vectors and a target return the fitting + coefficients.""" + matrix = np.array(vectors).T + a = np.vstack((matrix, np.identity(len(vectors))*self.options["regularization"])) + b = np.concatenate((target, np.zeros(len(vectors)))) + coefficients, _, _, _ = np.linalg.lstsq(a, b, rcond=-1) + return np.array(coefficients, dtype=np.float64) + +class AlternativeQuasiTimeReversible(QuasiTimeReversible): + + """Quasi time reversible fitting scheme.""" + + def fit(self, vectors: List[np.ndarray], target: np.ndarray): + """Given a set of vectors and a target return the fitting + coefficients in a quasi time reversible scheme.""" + + past_target = vectors[0] + matrix = np.array(vectors[1:]).T + + q = matrix.shape[1] + if q == 1: + time_reversible_matrix = matrix + elif q%2 == 0: + time_reversible_matrix = matrix[:, :q//2] + matrix[:, :q//2-1:-1] + else: + time_reversible_matrix = matrix[:, :q//2+1] + matrix[:, :q//2-1:-1] + + a = np.vstack((time_reversible_matrix, + np.identity(time_reversible_matrix.shape[1])*self.options["regularization"])) + b = np.concatenate((target + past_target, np.zeros((time_reversible_matrix.shape[1])))) + + coefficients, _, _, _ = np.linalg.lstsq(a, b, rcond=-1) + + + if q == 1: + full_coefficients = np.concatenate(([-1.0], coefficients)) + elif q%2 == 0: + full_coefficients = np.concatenate(([-1.0], coefficients, + coefficients[::-1])) + else: + full_coefficients = np.concatenate(([-1.0], coefficients[:-1], + 2.0*coefficients[-1:], coefficients[-2::-1])) + return np.array(full_coefficients, dtype=np.float64)