Update.

f08e50f5 · Michele Nottoli · b5b64258 · f08e50f5 · f08e50f5
Commit f08e50f5 authored 1 year ago by Michele Nottoli
--- a/gext/fitting.py
+++ b/gext/fitting.py
@@ -41,6 +41,49 @@ class AbstractFitting(abc.ABC):
            result += vector*coeff
        return result
+class DiffFitting(AbstractFitting):
+    """least square minimization fitting of the diffvectors"""
+    supported_options = {
+        "regularization": 0.0,
+    }
+    def set_options(self, **kwargs):
+        """Set options for least square minimization"""
+        super().set_options(**kwargs)
+        if self.options["regularization"] < 0 \
+                or self.options["regularization"] > 100:
+            raise ValueError("Unsupported value for regularization")
+    def fit(self, vectors: List[np.ndarray], target: np.ndarray):
+        """Given a set of vectors and a target return the fitting
+        coefficients."""
+        if len(vectors) == 1:
+            raise ValueError("DiffFitting does not work for one vector")
+        ref = len(vectors) - 1
+        target = target - vectors[ref]
+        diff_vectors = []
+        for i in range(0, len(vectors)):
+            if i != ref:
+                diff_vectors.append(vectors[i] - vectors[ref])
+        matrix = np.array(diff_vectors).T
+        a = matrix.T @ matrix
+        b = matrix.T @ target
+        if self.options["regularization"] > 0.0:
+            a += np.identity(len(b))*self.options["regularization"]**2
+        coefficients = np.linalg.solve(a, b)
+        # Convert diff coefficients to normal coefficients (like in
+        # least square fitting)
+        coefficients = np.insert(coefficients, ref, 1.0 - np.sum(coefficients))
+        return coefficients
 class LeastSquare(AbstractFitting):
    """Simple least square minimization fitting."""
@@ -122,49 +165,19 @@ class PolynomialRegression(AbstractFitting):
    supported_options = {
        "regularization": 1e-3,
-        "minorder": 1,
+        "ref": -1,
-        "maxorder": 1,
+        "order": 1}
-        "outerprod": False}
    def set_options(self, **kwargs):
-        """Set options for quasi time reversible fitting"""
        super().set_options(**kwargs)
        if self.options["regularization"] < 0 \
                or self.options["regularization"] > 100:
            raise ValueError("Unsupported value for regularization")
-        if self.options["minorder"] < 0 or self.options["minorder"] > 3:
-            raise ValueError("minorder must be >= 0 and <= 3")
-        if self.options["minorder"] > self.options["maxorder"]:
-            raise ValueError("minorder must be <= maxorder")
-        if self.options["maxorder"] > 3:
-            raise ValueError("maxorder must be <= 3")
        self.matrix = np.zeros(0, dtype=np.float64)
        self.gamma_shape = None
-    def get_orders(self, descriptors):
-        orders = []
-        if 0 >= self.options["minorder"] and 0 <= self.options["maxorder"]:
-            if len(descriptors.shape) > 1:
-                orders.append(np.ones((descriptors.shape[0], 1)))
-            else:
-                orders.append(np.ones(1))
-        if 1 >= self.options["minorder"] and 1 <= self.options["maxorder"]:
-            orders.append(descriptors)
-        if 2 >= self.options["minorder"] and 2 <= self.options["maxorder"]:
-            if self.options["outerprod"]:
-                orders.append(np.array([np.outer(d, d).flatten() for d in descriptors]))
-            else:
-                orders.append(descriptors**2)
-        if 3 >= self.options["minorder"] and 3 <= self.options["maxorder"]:
-            orders.append(descriptors**3)
-        if len(orders) > 1:
-            return np.hstack(orders)
-        else:
-            return orders[0]
    def fit(self, descriptor_list: List[np.ndarray], gamma_list: List[np.ndarray]):
        """Given a set of vectors and a set of gammas, construct the
        transformation matrix."""
@@ -172,20 +185,49 @@ class PolynomialRegression(AbstractFitting):
        if self.gamma_shape is None:
            self.gamma_shape = gamma_list[0].shape
-        descriptors = np.array(descriptor_list, dtype=np.float64)
+        if self.options["ref"] is not None:
-        gammas = np.reshape(gamma_list,
-            (len(gamma_list), self.gamma_shape[0]*self.gamma_shape[1]))
-        vander = self.get_orders(descriptors)
+            if len(descriptor_list) == 1:
-        a = vander.T @ vander
+                raise ValueError("Ref does not work for one data point")
-        b = vander.T @ gammas
-        if self.options["regularization"] > 0.0:
+            ref = self.options["ref"]
-            a += np.identity(len(b))*self.options["regularization"]**2
+            if ref < 0:
+                ref += len(descriptor_list)
+            self.descriptor_ref = descriptor_list[ref]
+            self.gamma_ref = gamma_list[ref].flatten()
-        self.matrix = np.linalg.solve(a, b)
+            descriptors = []
+            gammas = []
+            for i in range(len(gamma_list)):
+                if i == ref:
+                    continue
+                descriptors.append(
+                    descriptor_list[i] - self.descriptor_ref)
+                gammas.append(
+                    gamma_list[i].flatten() - self.gamma_ref)
+            d = np.array(descriptors, dtype=np.float64).T
+            gammas = np.array(gammas, dtype=np.float64).T
+        else:
+            d = np.array(descriptor_list, dtype=np.float64).T
+            gammas = np.reshape(gamma_list,
+                (len(gamma_list), self.gamma_shape[0]*self.gamma_shape[1])).T
+        if self.options["order"] >= 2:
+            d = np.vstack((d, d**2))
+        a = d @ np.linalg.inv(d.T @ d + np.identity(d.shape[1])*self.options["regularization"]**2)
+        self.matrix = a @ gammas.T
    def apply(self, descriptor):
        """Apply the matrix to the current descriptor."""
+        if self.options["ref"]:
-        gamma = self.get_orders(np.array([descriptor])) @ self.matrix
+            descriptor -= self.descriptor_ref
+        descriptor = np.array([descriptor])
+        if self.options["order"] >= 2:
+            descriptor = np.concatenate([descriptor, descriptor**2], axis=1)
+        gamma = descriptor @ self.matrix
+        if self.options["ref"]:
+            gamma += self.gamma_ref
        return np.reshape(gamma, self.gamma_shape)
--- a/gext/main.py
+++ b/gext/main.py
@@ -4,7 +4,7 @@ from typing import Optional
 import numpy as np
 from . import grassmann
-from .fitting import LeastSquare, QuasiTimeReversible, PolynomialRegression
+from .fitting import LeastSquare, QuasiTimeReversible, PolynomialRegression, DiffFitting
 from .descriptors import Distance, Coulomb
 from .buffer import CircularBuffer
@@ -34,7 +34,7 @@ class Extrapolator:
        self.natoms = natoms
        self.set_options(**kwargs)
-        self.gammas = CircularBuffer(self.options["nsteps"],
+        self.coefficients = CircularBuffer(self.options["nsteps"],
            (self.nelectrons//2, self.nbasis))
        self.descriptors = CircularBuffer(self.options["nsteps"],
            ((self.natoms - 1)*self.natoms//2, ))
@@ -84,6 +84,8 @@ class Extrapolator:
        if self.options["fitting"] == "leastsquare":
            self.fitting_calculator = LeastSquare()
+        elif self.options["fitting"] == "diff":
+            self.fitting_calculator = DiffFitting()
        elif self.options["fitting"] == "qtr":
            self.fitting_calculator = QuasiTimeReversible()
        elif self.options["fitting"] == "polynomialregression":
@@ -100,12 +102,7 @@ class Extrapolator:
        coeff = self._crop_coeff(coeff)
        coeff = self._normalize(coeff, overlap)
-        # if it is the first time we load data, set the tangent point
+        self.coefficients.push(coeff)
-        if self.tangent is None:
-            self._set_tangent(coeff)
-        # push the new data to the corresponding vectors
-        self.gammas.push(self._grassmann_log(coeff))
        self.descriptors.push(self._compute_descriptor(coords))
        if self.options["store_overlap"]:
@@ -135,7 +132,10 @@ class Extrapolator:
        # get the required quantities
        prev_descriptors = self.descriptors.get(n)
-        gammas = self.gammas.get(n)
+        coefficients = self.coefficients.get(n)
+        gammas = [self._grassmann_log(c) for c in coefficients]
        descriptor = self._compute_descriptor(coords)
        # use the descriptors to find the fitting coefficients
@@ -171,9 +171,14 @@ class Extrapolator:
    def _get_tangent(self) -> np.ndarray:
        """Get the tangent point."""
-        if self.tangent is not None:
+        count = self.descriptors.count
-            return self.tangent
+        n = min(self.options["nsteps"], count)
-        raise ValueError("Tangent point is not set.")
+        if "ref" in self.fitting_calculator.options:
+            ref = self.fitting_calculator.options["ref"]
+        else:
+            ref = 0
+        coefficients = self.coefficients.get(n)
+        return coefficients[ref]
    def _crop_coeff(self, coeff) -> np.ndarray:
        """Crop the coefficient matrix to remove the virtual orbitals."""