From f08e50f558a0a0f5e823876c3502ed6b06864fb7 Mon Sep 17 00:00:00 2001
From: Michele Nottoli <michele.nottoli@gmail.com>
Date: Tue, 2 Jul 2024 14:54:05 +0200
Subject: [PATCH] Update.

---
 gext/fitting.py | 128 ++++++++++++++++++++++++++++++++----------------
 gext/main.py    |  29 ++++++-----
 2 files changed, 102 insertions(+), 55 deletions(-)

diff --git a/gext/fitting.py b/gext/fitting.py
index fc5224f..375328b 100644
--- a/gext/fitting.py
+++ b/gext/fitting.py
@@ -41,6 +41,49 @@ class AbstractFitting(abc.ABC):
             result += vector*coeff
         return result
 
+class DiffFitting(AbstractFitting):
+
+    """least square minimization fitting of the diffvectors"""
+
+    supported_options = {
+        "regularization": 0.0,
+    }
+
+    def set_options(self, **kwargs):
+        """Set options for least square minimization"""
+        super().set_options(**kwargs)
+
+        if self.options["regularization"] < 0 \
+                or self.options["regularization"] > 100:
+            raise ValueError("Unsupported value for regularization")
+
+    def fit(self, vectors: List[np.ndarray], target: np.ndarray):
+        """Given a set of vectors and a target return the fitting
+        coefficients."""
+        if len(vectors) == 1:
+            raise ValueError("DiffFitting does not work for one vector")
+
+        ref = len(vectors) - 1
+        target = target - vectors[ref]
+
+        diff_vectors = []
+        for i in range(0, len(vectors)):
+            if i != ref:
+                diff_vectors.append(vectors[i] - vectors[ref])
+
+        matrix = np.array(diff_vectors).T
+        a = matrix.T @ matrix
+        b = matrix.T @ target
+        if self.options["regularization"] > 0.0:
+            a += np.identity(len(b))*self.options["regularization"]**2
+        coefficients = np.linalg.solve(a, b)
+
+        # Convert diff coefficients to normal coefficients (like in
+        # least square fitting)
+        coefficients = np.insert(coefficients, ref, 1.0 - np.sum(coefficients))
+
+        return coefficients
+
 class LeastSquare(AbstractFitting):
 
     """Simple least square minimization fitting."""
@@ -122,49 +165,19 @@ class PolynomialRegression(AbstractFitting):
 
     supported_options = {
         "regularization": 1e-3,
-        "minorder": 1,
-        "maxorder": 1,
-        "outerprod": False}
+        "ref": -1,
+        "order": 1}
 
     def set_options(self, **kwargs):
-        """Set options for quasi time reversible fitting"""
         super().set_options(**kwargs)
 
         if self.options["regularization"] < 0 \
                 or self.options["regularization"] > 100:
             raise ValueError("Unsupported value for regularization")
 
-        if self.options["minorder"] < 0 or self.options["minorder"] > 3:
-            raise ValueError("minorder must be >= 0 and <= 3")
-        if self.options["minorder"] > self.options["maxorder"]:
-            raise ValueError("minorder must be <= maxorder")
-        if self.options["maxorder"] > 3:
-            raise ValueError("maxorder must be <= 3")
-
         self.matrix = np.zeros(0, dtype=np.float64)
         self.gamma_shape = None
 
-    def get_orders(self, descriptors):
-        orders = []
-        if 0 >= self.options["minorder"] and 0 <= self.options["maxorder"]:
-            if len(descriptors.shape) > 1:
-                orders.append(np.ones((descriptors.shape[0], 1)))
-            else:
-                orders.append(np.ones(1))
-        if 1 >= self.options["minorder"] and 1 <= self.options["maxorder"]:
-            orders.append(descriptors)
-        if 2 >= self.options["minorder"] and 2 <= self.options["maxorder"]:
-            if self.options["outerprod"]:
-                orders.append(np.array([np.outer(d, d).flatten() for d in descriptors]))
-            else:
-                orders.append(descriptors**2)
-        if 3 >= self.options["minorder"] and 3 <= self.options["maxorder"]:
-            orders.append(descriptors**3)
-        if len(orders) > 1:
-            return np.hstack(orders)
-        else:
-            return orders[0]
-
     def fit(self, descriptor_list: List[np.ndarray], gamma_list: List[np.ndarray]):
         """Given a set of vectors and a set of gammas, construct the
         transformation matrix."""
@@ -172,20 +185,49 @@ class PolynomialRegression(AbstractFitting):
         if self.gamma_shape is None:
             self.gamma_shape = gamma_list[0].shape
 
-        descriptors = np.array(descriptor_list, dtype=np.float64)
-        gammas = np.reshape(gamma_list,
-            (len(gamma_list), self.gamma_shape[0]*self.gamma_shape[1]))
+        if self.options["ref"] is not None:
 
-        vander = self.get_orders(descriptors)
-        a = vander.T @ vander
-        b = vander.T @ gammas
-        if self.options["regularization"] > 0.0:
-            a += np.identity(len(b))*self.options["regularization"]**2
+            if len(descriptor_list) == 1:
+                raise ValueError("Ref does not work for one data point")
+
+            ref = self.options["ref"]
+            if ref < 0:
+                ref += len(descriptor_list)
+
+            self.descriptor_ref = descriptor_list[ref]
+            self.gamma_ref = gamma_list[ref].flatten()
 
-        self.matrix = np.linalg.solve(a, b)
+            descriptors = []
+            gammas = []
+            for i in range(len(gamma_list)):
+                if i == ref:
+                    continue
+                descriptors.append(
+                    descriptor_list[i] - self.descriptor_ref)
+                gammas.append(
+                    gamma_list[i].flatten() - self.gamma_ref)
+
+            d = np.array(descriptors, dtype=np.float64).T
+            gammas = np.array(gammas, dtype=np.float64).T
+        else:
+            d = np.array(descriptor_list, dtype=np.float64).T
+            gammas = np.reshape(gamma_list,
+                (len(gamma_list), self.gamma_shape[0]*self.gamma_shape[1])).T
+
+        if self.options["order"] >= 2:
+            d = np.vstack((d, d**2))
+
+        a = d @ np.linalg.inv(d.T @ d + np.identity(d.shape[1])*self.options["regularization"]**2)
+        self.matrix = a @ gammas.T
 
     def apply(self, descriptor):
         """Apply the matrix to the current descriptor."""
-
-        gamma = self.get_orders(np.array([descriptor])) @ self.matrix
+        if self.options["ref"]:
+            descriptor -= self.descriptor_ref
+        descriptor = np.array([descriptor])
+        if self.options["order"] >= 2:
+            descriptor = np.concatenate([descriptor, descriptor**2], axis=1)
+        gamma = descriptor @ self.matrix
+        if self.options["ref"]:
+            gamma += self.gamma_ref
         return np.reshape(gamma, self.gamma_shape)
diff --git a/gext/main.py b/gext/main.py
index a9b2b43..ab0f781 100644
--- a/gext/main.py
+++ b/gext/main.py
@@ -4,7 +4,7 @@ from typing import Optional
 import numpy as np
 
 from . import grassmann
-from .fitting import LeastSquare, QuasiTimeReversible, PolynomialRegression
+from .fitting import LeastSquare, QuasiTimeReversible, PolynomialRegression, DiffFitting
 from .descriptors import Distance, Coulomb
 from .buffer import CircularBuffer
 
@@ -34,7 +34,7 @@ class Extrapolator:
         self.natoms = natoms
         self.set_options(**kwargs)
 
-        self.gammas = CircularBuffer(self.options["nsteps"],
+        self.coefficients = CircularBuffer(self.options["nsteps"],
             (self.nelectrons//2, self.nbasis))
         self.descriptors = CircularBuffer(self.options["nsteps"],
             ((self.natoms - 1)*self.natoms//2, ))
@@ -84,6 +84,8 @@ class Extrapolator:
 
         if self.options["fitting"] == "leastsquare":
             self.fitting_calculator = LeastSquare()
+        elif self.options["fitting"] == "diff":
+            self.fitting_calculator = DiffFitting()
         elif self.options["fitting"] == "qtr":
             self.fitting_calculator = QuasiTimeReversible()
         elif self.options["fitting"] == "polynomialregression":
@@ -100,12 +102,7 @@ class Extrapolator:
         coeff = self._crop_coeff(coeff)
         coeff = self._normalize(coeff, overlap)
 
-        # if it is the first time we load data, set the tangent point
-        if self.tangent is None:
-            self._set_tangent(coeff)
-
-        # push the new data to the corresponding vectors
-        self.gammas.push(self._grassmann_log(coeff))
+        self.coefficients.push(coeff)
         self.descriptors.push(self._compute_descriptor(coords))
 
         if self.options["store_overlap"]:
@@ -135,7 +132,10 @@ class Extrapolator:
 
         # get the required quantities
         prev_descriptors = self.descriptors.get(n)
-        gammas = self.gammas.get(n)
+
+        coefficients = self.coefficients.get(n)
+        gammas = [self._grassmann_log(c) for c in coefficients]
+
         descriptor = self._compute_descriptor(coords)
 
         # use the descriptors to find the fitting coefficients
@@ -171,9 +171,14 @@ class Extrapolator:
 
     def _get_tangent(self) -> np.ndarray:
         """Get the tangent point."""
-        if self.tangent is not None:
-            return self.tangent
-        raise ValueError("Tangent point is not set.")
+        count = self.descriptors.count
+        n = min(self.options["nsteps"], count)
+        if "ref" in self.fitting_calculator.options:
+            ref = self.fitting_calculator.options["ref"]
+        else:
+            ref = 0
+        coefficients = self.coefficients.get(n)
+        return coefficients[ref]
 
     def _crop_coeff(self, coeff) -> np.ndarray:
         """Crop the coefficient matrix to remove the virtual orbitals."""
-- 
GitLab