diff --git a/gext/fitting.py b/gext/fitting.py index fc5224fe8522809cf15552d4fd4a9f7f0a426525..375328b0a9faa6b3d02cdd71092bd504eff469ec 100644 --- a/gext/fitting.py +++ b/gext/fitting.py @@ -41,6 +41,49 @@ class AbstractFitting(abc.ABC): result += vector*coeff return result +class DiffFitting(AbstractFitting): + + """least square minimization fitting of the diffvectors""" + + supported_options = { + "regularization": 0.0, + } + + def set_options(self, **kwargs): + """Set options for least square minimization""" + super().set_options(**kwargs) + + if self.options["regularization"] < 0 \ + or self.options["regularization"] > 100: + raise ValueError("Unsupported value for regularization") + + def fit(self, vectors: List[np.ndarray], target: np.ndarray): + """Given a set of vectors and a target return the fitting + coefficients.""" + if len(vectors) == 1: + raise ValueError("DiffFitting does not work for one vector") + + ref = len(vectors) - 1 + target = target - vectors[ref] + + diff_vectors = [] + for i in range(0, len(vectors)): + if i != ref: + diff_vectors.append(vectors[i] - vectors[ref]) + + matrix = np.array(diff_vectors).T + a = matrix.T @ matrix + b = matrix.T @ target + if self.options["regularization"] > 0.0: + a += np.identity(len(b))*self.options["regularization"]**2 + coefficients = np.linalg.solve(a, b) + + # Convert diff coefficients to normal coefficients (like in + # least square fitting) + coefficients = np.insert(coefficients, ref, 1.0 - np.sum(coefficients)) + + return coefficients + class LeastSquare(AbstractFitting): """Simple least square minimization fitting.""" @@ -122,49 +165,19 @@ class PolynomialRegression(AbstractFitting): supported_options = { "regularization": 1e-3, - "minorder": 1, - "maxorder": 1, - "outerprod": False} + "ref": -1, + "order": 1} def set_options(self, **kwargs): - """Set options for quasi time reversible fitting""" super().set_options(**kwargs) if self.options["regularization"] < 0 \ or self.options["regularization"] > 100: raise ValueError("Unsupported value for regularization") - if self.options["minorder"] < 0 or self.options["minorder"] > 3: - raise ValueError("minorder must be >= 0 and <= 3") - if self.options["minorder"] > self.options["maxorder"]: - raise ValueError("minorder must be <= maxorder") - if self.options["maxorder"] > 3: - raise ValueError("maxorder must be <= 3") - self.matrix = np.zeros(0, dtype=np.float64) self.gamma_shape = None - def get_orders(self, descriptors): - orders = [] - if 0 >= self.options["minorder"] and 0 <= self.options["maxorder"]: - if len(descriptors.shape) > 1: - orders.append(np.ones((descriptors.shape[0], 1))) - else: - orders.append(np.ones(1)) - if 1 >= self.options["minorder"] and 1 <= self.options["maxorder"]: - orders.append(descriptors) - if 2 >= self.options["minorder"] and 2 <= self.options["maxorder"]: - if self.options["outerprod"]: - orders.append(np.array([np.outer(d, d).flatten() for d in descriptors])) - else: - orders.append(descriptors**2) - if 3 >= self.options["minorder"] and 3 <= self.options["maxorder"]: - orders.append(descriptors**3) - if len(orders) > 1: - return np.hstack(orders) - else: - return orders[0] - def fit(self, descriptor_list: List[np.ndarray], gamma_list: List[np.ndarray]): """Given a set of vectors and a set of gammas, construct the transformation matrix.""" @@ -172,20 +185,49 @@ class PolynomialRegression(AbstractFitting): if self.gamma_shape is None: self.gamma_shape = gamma_list[0].shape - descriptors = np.array(descriptor_list, dtype=np.float64) - gammas = np.reshape(gamma_list, - (len(gamma_list), self.gamma_shape[0]*self.gamma_shape[1])) + if self.options["ref"] is not None: - vander = self.get_orders(descriptors) - a = vander.T @ vander - b = vander.T @ gammas - if self.options["regularization"] > 0.0: - a += np.identity(len(b))*self.options["regularization"]**2 + if len(descriptor_list) == 1: + raise ValueError("Ref does not work for one data point") + + ref = self.options["ref"] + if ref < 0: + ref += len(descriptor_list) + + self.descriptor_ref = descriptor_list[ref] + self.gamma_ref = gamma_list[ref].flatten() - self.matrix = np.linalg.solve(a, b) + descriptors = [] + gammas = [] + for i in range(len(gamma_list)): + if i == ref: + continue + descriptors.append( + descriptor_list[i] - self.descriptor_ref) + gammas.append( + gamma_list[i].flatten() - self.gamma_ref) + + d = np.array(descriptors, dtype=np.float64).T + gammas = np.array(gammas, dtype=np.float64).T + else: + d = np.array(descriptor_list, dtype=np.float64).T + gammas = np.reshape(gamma_list, + (len(gamma_list), self.gamma_shape[0]*self.gamma_shape[1])).T + + if self.options["order"] >= 2: + d = np.vstack((d, d**2)) + + a = d @ np.linalg.inv(d.T @ d + np.identity(d.shape[1])*self.options["regularization"]**2) + self.matrix = a @ gammas.T def apply(self, descriptor): """Apply the matrix to the current descriptor.""" - - gamma = self.get_orders(np.array([descriptor])) @ self.matrix + if self.options["ref"]: + descriptor -= self.descriptor_ref + descriptor = np.array([descriptor]) + if self.options["order"] >= 2: + descriptor = np.concatenate([descriptor, descriptor**2], axis=1) + gamma = descriptor @ self.matrix + if self.options["ref"]: + gamma += self.gamma_ref return np.reshape(gamma, self.gamma_shape) diff --git a/gext/main.py b/gext/main.py index a9b2b430e8a452f84b5906c8107751c8f2be8ba6..ab0f781d86bc240df97263617437bd146f6ea99e 100644 --- a/gext/main.py +++ b/gext/main.py @@ -4,7 +4,7 @@ from typing import Optional import numpy as np from . import grassmann -from .fitting import LeastSquare, QuasiTimeReversible, PolynomialRegression +from .fitting import LeastSquare, QuasiTimeReversible, PolynomialRegression, DiffFitting from .descriptors import Distance, Coulomb from .buffer import CircularBuffer @@ -34,7 +34,7 @@ class Extrapolator: self.natoms = natoms self.set_options(**kwargs) - self.gammas = CircularBuffer(self.options["nsteps"], + self.coefficients = CircularBuffer(self.options["nsteps"], (self.nelectrons//2, self.nbasis)) self.descriptors = CircularBuffer(self.options["nsteps"], ((self.natoms - 1)*self.natoms//2, )) @@ -84,6 +84,8 @@ class Extrapolator: if self.options["fitting"] == "leastsquare": self.fitting_calculator = LeastSquare() + elif self.options["fitting"] == "diff": + self.fitting_calculator = DiffFitting() elif self.options["fitting"] == "qtr": self.fitting_calculator = QuasiTimeReversible() elif self.options["fitting"] == "polynomialregression": @@ -100,12 +102,7 @@ class Extrapolator: coeff = self._crop_coeff(coeff) coeff = self._normalize(coeff, overlap) - # if it is the first time we load data, set the tangent point - if self.tangent is None: - self._set_tangent(coeff) - - # push the new data to the corresponding vectors - self.gammas.push(self._grassmann_log(coeff)) + self.coefficients.push(coeff) self.descriptors.push(self._compute_descriptor(coords)) if self.options["store_overlap"]: @@ -135,7 +132,10 @@ class Extrapolator: # get the required quantities prev_descriptors = self.descriptors.get(n) - gammas = self.gammas.get(n) + + coefficients = self.coefficients.get(n) + gammas = [self._grassmann_log(c) for c in coefficients] + descriptor = self._compute_descriptor(coords) # use the descriptors to find the fitting coefficients @@ -171,9 +171,14 @@ class Extrapolator: def _get_tangent(self) -> np.ndarray: """Get the tangent point.""" - if self.tangent is not None: - return self.tangent - raise ValueError("Tangent point is not set.") + count = self.descriptors.count + n = min(self.options["nsteps"], count) + if "ref" in self.fitting_calculator.options: + ref = self.fitting_calculator.options["ref"] + else: + ref = 0 + coefficients = self.coefficients.get(n) + return coefficients[ref] def _crop_coeff(self, coeff) -> np.ndarray: """Crop the coefficient matrix to remove the virtual orbitals."""