From 7bfdb65d892b3e292f9066d1afc6419521da20b0 Mon Sep 17 00:00:00 2001
From: Michele Nottoli <michele.nottoli@gmail.com>
Date: Thu, 9 Nov 2023 11:42:50 +0100
Subject: [PATCH] Changed default for regularization, added a test.

---
 gext/fitting.py                  |  4 +-
 tests/test_descriptor_fitting.py | 68 ++++++++++++++++++++++++++++++++
 tests/utils.py                   | 55 ++++++++++++++++++++++++++
 3 files changed, 125 insertions(+), 2 deletions(-)

diff --git a/gext/fitting.py b/gext/fitting.py
index efd09f0..21fa5a3 100644
--- a/gext/fitting.py
+++ b/gext/fitting.py
@@ -64,7 +64,7 @@ class LeastSquare(AbstractFitting):
         a = matrix.T @ matrix
         b = matrix.T @ target
         if self.options["regularization"] > 0.0:
-            a += np.identity(len(b))*self.options["regularization"]
+            a += np.identity(len(b))*self.options["regularization"]**2
         coefficients = np.linalg.solve(a, b)
         return np.array(coefficients, dtype=np.float64)
 
@@ -103,7 +103,7 @@ class QuasiTimeReversible(AbstractFitting):
         b = time_reversible_matrix.T @ (target + past_target)
 
         if self.options["regularization"] > 0.0:
-            a += np.identity(len(b))*self.options["regularization"]
+            a += np.identity(len(b))*self.options["regularization"]**2
         coefficients = np.linalg.solve(a, b)
 
         if q == 1:
diff --git a/tests/test_descriptor_fitting.py b/tests/test_descriptor_fitting.py
index f0ae573..d0c78a0 100644
--- a/tests/test_descriptor_fitting.py
+++ b/tests/test_descriptor_fitting.py
@@ -129,3 +129,71 @@ def test_time_reversibility(datafile):
 
     # check the time reversibility
     assert np.linalg.norm(fitted_target - fitted_target_reverse, ord=np.inf) < SMALL
+
+@pytest.mark.parametrize("datafile", ["urea.json", "glucose.json"])
+@pytest.mark.parametrize("regularization", [0.001, 0.005, 0.01, 0.05, 0.1])
+def test_square_qtr_formulation(datafile, regularization):
+
+    # load test data from json file
+    data = utils.load_json(f"tests/{datafile}")
+    nelectrons = data["nelectrons"]
+    natoms = data["trajectory"].shape[1]
+    nbasis = data["overlaps"].shape[1]
+    nframes = data["trajectory"].shape[0]
+
+    # initialize an extrapolator
+    extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms,
+        nsteps=nframes, fitting="qtr", fitting_regularization=regularization)
+
+    # load data in the extrapolator
+    for (coords, coeff, overlap) in zip(data["trajectory"],
+            data["coefficients"], data["overlaps"]):
+        extrapolator.load_data(coords, coeff, overlap)
+
+    descriptors = extrapolator.descriptors.get(10)
+    target = descriptors[-1]
+
+    fitting_calculator = extrapolator.fitting_calculator
+    alt_fitting_calculator = utils.AlternativeQuasiTimeReversible()
+    alt_fitting_calculator.set_options(regularization=regularization)
+
+    # check if the two fittings give the same coefficients
+    for start in range(0, 8):
+        vectors = descriptors[start:-1]
+        fit_coefficients = fitting_calculator.fit(vectors, target)
+        alt_fit_coefficients = alt_fitting_calculator.fit(vectors, target)
+        assert np.linalg.norm(fit_coefficients - alt_fit_coefficients, ord=np.inf) < 1e-4
+
+@pytest.mark.parametrize("datafile", ["urea.json", "glucose.json"])
+@pytest.mark.parametrize("regularization", [0.001, 0.005, 0.01, 0.05, 0.1])
+def test_square_ls_formulation(datafile, regularization):
+
+    # load test data from json file
+    data = utils.load_json(f"tests/{datafile}")
+    nelectrons = data["nelectrons"]
+    natoms = data["trajectory"].shape[1]
+    nbasis = data["overlaps"].shape[1]
+    nframes = data["trajectory"].shape[0]
+
+    # initialize an extrapolator
+    extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms,
+        nsteps=nframes, fitting="leastsquare", fitting_regularization=regularization)
+
+    # load data in the extrapolator
+    for (coords, coeff, overlap) in zip(data["trajectory"],
+            data["coefficients"], data["overlaps"]):
+        extrapolator.load_data(coords, coeff, overlap)
+
+    descriptors = extrapolator.descriptors.get(10)
+    target = descriptors[-1]
+
+    fitting_calculator = extrapolator.fitting_calculator
+    alt_fitting_calculator = utils.AlternativeLeastSquare()
+    alt_fitting_calculator.set_options(regularization=regularization)
+
+    # check if the two fittings give the same coefficients
+    for start in range(0, 8):
+        vectors = descriptors[start:-1]
+        fit_coefficients = fitting_calculator.fit(vectors, target)
+        alt_fit_coefficients = alt_fitting_calculator.fit(vectors, target)
+        assert np.linalg.norm(fit_coefficients - alt_fit_coefficients, ord=np.inf) < 1e-4
diff --git a/tests/utils.py b/tests/utils.py
index 80ba090..fdd0205 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -1,5 +1,11 @@
+import os
+import sys
 import json
 import numpy as np
+from typing import List
+
+sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+from gext.fitting import LeastSquare, QuasiTimeReversible
 
 def load_json(path):
     with open(path, "r") as json_file:
@@ -12,3 +18,52 @@ def load_json(path):
             else:
                 data[key] = np.array(value)
     return data
+
+class AlternativeLeastSquare(LeastSquare):
+
+    """Alternative least square minimization fitting."""
+
+    def fit(self, vectors: List[np.ndarray], target: np.ndarray):
+        """Given a set of vectors and a target return the fitting
+        coefficients."""
+        matrix = np.array(vectors).T
+        a = np.vstack((matrix, np.identity(len(vectors))*self.options["regularization"]))
+        b = np.concatenate((target, np.zeros(len(vectors))))
+        coefficients, _, _, _ = np.linalg.lstsq(a, b, rcond=-1)
+        return np.array(coefficients, dtype=np.float64)
+
+class AlternativeQuasiTimeReversible(QuasiTimeReversible):
+
+    """Quasi time reversible fitting scheme."""
+
+    def fit(self, vectors: List[np.ndarray], target: np.ndarray):
+        """Given a set of vectors and a target return the fitting
+        coefficients in a quasi time reversible scheme."""
+
+        past_target = vectors[0]
+        matrix = np.array(vectors[1:]).T
+
+        q = matrix.shape[1]
+        if q == 1:
+            time_reversible_matrix = matrix
+        elif q%2 == 0:
+            time_reversible_matrix = matrix[:, :q//2] + matrix[:, :q//2-1:-1]
+        else:
+            time_reversible_matrix = matrix[:, :q//2+1] + matrix[:, :q//2-1:-1]
+
+        a = np.vstack((time_reversible_matrix,
+            np.identity(time_reversible_matrix.shape[1])*self.options["regularization"]))
+        b = np.concatenate((target + past_target, np.zeros((time_reversible_matrix.shape[1]))))
+
+        coefficients, _, _, _ = np.linalg.lstsq(a, b, rcond=-1)
+
+
+        if q == 1:
+            full_coefficients = np.concatenate(([-1.0], coefficients))
+        elif q%2 == 0:
+            full_coefficients = np.concatenate(([-1.0], coefficients,
+                coefficients[::-1]))
+        else:
+            full_coefficients = np.concatenate(([-1.0], coefficients[:-1],
+                2.0*coefficients[-1:], coefficients[-2::-1]))
+        return np.array(full_coefficients, dtype=np.float64)
-- 
GitLab