Compare revisions

Michele Nottoli · Michele Nottoli · a7068085 · a7068085 · a7068085 · a7068085
--- a/test_diff_fitting.py
+++ b/test_diff_fitting.py
--- a/gext/buffer.py
+++ b/gext/buffer.py
@@ -8,15 +8,19 @@ class CircularBuffer:

    """Circular buffer to store the last `n` matrices."""

-    def __init__(self, n: int, shape: Tuple[int, ...]):
+    def __init__(self, n: int):
        self.n = n
-        self.shape = shape
-        self.buffer = [np.zeros(shape, dtype=np.float64) for _ in range(n)]
+        self.buffer = None
        self.index = 0
        self.count = 0

+    def _allocate_buffer(self, shape: Tuple[int, ...]):
+        self.buffer = [np.zeros(shape, dtype=np.float64) for _ in range(self.n)]
+
    def push(self, data):
        """Add a new matrix to the buffer."""
+        if self.buffer is None:
+            self._allocate_buffer(data.shape)
        self.buffer[self.index] = data.copy()
        self.index = (self.index + 1) % self.n
        if self.count < self.n:

--- a/gext/descriptors.py
+++ b/gext/descriptors.py
@@ -3,9 +3,7 @@
 import numpy as np
 from scipy.spatial.distance import pdist

-class Distance:
-
-    """Distance matrix descriptors."""
+class BaseDescriptor:

    supported_options = {}

@@ -18,6 +16,10 @@ class Distance:
        if len(kwargs) > 0:
            raise ValueError("Invalid arguments given to the descriptor class.")

+class Distance(BaseDescriptor):
+
+    """Distance matrix descriptors."""
+
    def compute(self, coords: np.ndarray) -> np.ndarray:
        """Compute the distance matric as a descriptor."""
        return pdist(coords, metric="euclidean")
@@ -31,3 +33,16 @@ class Coulomb(Distance):
    def compute(self, coords: np.ndarray) -> np.ndarray:
        """Compute the Coulomb matrix as a descriptor."""
        return 1.0/super().compute(coords)
+
+class FlattenMatrix(BaseDescriptor):
+
+    """Use the quantity as it is, just flatten it."""
+
+    supported_options = {}
+
+    def __init__(self, **kwargs):
+        pass
+
+    def compute(self, matrix: np.ndarray) -> np.ndarray:
+        """Compute the descriptor by flattening the matrix."""
+        return matrix.flatten()
--- a/gext/fitting.py
+++ b/gext/fitting.py
@@ -117,7 +117,6 @@ class LeastSquare(AbstractFitting):
        if self.options["regularization"] > 0.0:
            a += np.identity(len(b))*self.options["regularization"]
        coefficients = np.linalg.solve(a, b)
-        print(coefficients)
        return np.array(coefficients, dtype=np.float64)

 class QuasiTimeReversible(AbstractFitting):

--- a/gext/main.py
+++ b/gext/main.py
@@ -5,7 +5,7 @@ import numpy as np

 from . import grassmann
 from .fitting import LeastSquare, QuasiTimeReversible,DiffFitting
-from .descriptors import Distance, Coulomb
+from .descriptors import Distance, Coulomb, FlattenMatrix
 from .buffer import CircularBuffer

 class Extrapolator:
@@ -35,19 +35,15 @@ class Extrapolator:
        self.natoms = natoms
        self.set_options(**kwargs)

-        self.gammas = CircularBuffer(self.options["nsteps"],
-            (self.nelectrons//2, self.nbasis))
-        self.descriptors = CircularBuffer(self.options["nsteps"],
-            ((self.natoms - 1)*self.natoms//2, ))
+        self.gammas = CircularBuffer(self.options["nsteps"])
+        self.descriptors = CircularBuffer(self.options["nsteps"])
        if self.options["store_overlap"]:
-            self.overlaps = CircularBuffer(self.options["nsteps"],
-                (self.nbasis, self.nbasis))
+            self.overlaps = CircularBuffer(self.options["nsteps"])
        if self.options["tangent"]=="one_before_last":
-            self.coeffs=CircularBuffer(self.options["nsteps"],
-                (self.nelectrons//2, self.nbasis))
+            self.coeffs = CircularBuffer(self.options["nsteps"])
        self.tangent: Optional[np.ndarray] = None
-        self.H_cores=CircularBuffer(self.options["nsteps"],
-                (self.nbasis, self.nbasis))
+        self.H_cores = CircularBuffer(self.options["nsteps"])
+
    def set_options(self, **kwargs):
        """Given an arbitrary amount of keyword arguments, parse them if
        specified, set default values if not specified and raise an error
@@ -83,14 +79,14 @@ class Extrapolator:
        elif self.options["descriptor"] == "coulomb":
            self.descriptor_calculator = Coulomb()
        elif self.options["descriptor"] == "H_core":
-            pass
+            self.descriptor_calculator = FlattenMatrix()
        else:
            raise ValueError("Unsupported descriptor")
-        if self.options["descriptor"] is not "H_core":
+
        self.descriptor_calculator.set_options(**descriptor_options)

        if self.options["fitting"] == "leastsquare":
-            eelf.fitting_calculator = LeastSquare()
+            self.fitting_calculator = LeastSquare()
        elif self.options["fitting"] == "diff":
            self.fitting_calculator = DiffFitting()
        elif self.options["fitting"] == "qtr":
@@ -99,7 +95,7 @@ class Extrapolator:
            raise ValueError("Unsupported fitting")
        self.fitting_calculator.set_options(**fitting_options)

-    def load_data(self, H_core: np.ndarray, coeff: np.ndarray, overlap):
+    def load_data(self, descriptor_input: np.ndarray, coeff: np.ndarray, overlap):
        """Load a new data point in the extrapolator."""

        # Crop the coefficient matrix up to the number of electron
@@ -113,11 +109,8 @@ class Extrapolator:
        # if it is the first time we load data, set the tangent point
        if self.tangent is None and self.options["tangent"] is not "one_before_last":
            self._set_tangent(coeff)
-        if self.options["descriptor"]=="H_core":
-            self.H_cores.push(H_core)
-        else:
        # push the new data to the corresponding vectors
-            self.descriptors.push(self._compute_descriptor(coords))
+        self.descriptors.push(self._compute_descriptor(descriptor_input))

        if self.options["store_overlap"]:
            self.overlaps.push(overlap)
@@ -127,15 +120,12 @@ class Extrapolator:
        c_guess = self.guess_coefficients(coords, overlap)
        return c_guess @ c_guess.T

-    def guess_coefficients(self, H_core: np.ndarray, overlap=None) -> np.ndarray:
+    def guess_coefficients(self, descriptor_input: np.ndarray, overlap=None) -> np.ndarray:
        """Get a new coefficient matrix to be used as a guess."""

        # check if we have enough data points to perform an extrapolation
-       
-        if self.options["descriptor"]=="H_core":
-            count=self.H_cores.count
-        else:
        count = self.descriptors.count
+
        if self.options["allow_partially_filled"]:
            if count == 0:
                raise ValueError("Not enough data loaded in the extrapolator")
@@ -149,10 +139,6 @@ class Extrapolator:
            raise ValueError("Guessing without overlap requires `store_overlap` true.")

        # use the descriptors to find the fitting coefficients
-        if self.options["descriptor"]=="H_core":
-            prev_H_cores = self.H_cores.get(n)
-            fit_coefficients = self._fit(prev_H_cores, H_core)
-        else:
        prev_descriptors= self.descriptors.get(n)
        descriptor = self._compute_descriptor(coords)
        fit_coefficients = self._fit(prev_descriptors, descriptor)

--- a/tests/test_buffer.py
+++ b/tests/test_buffer.py
@@ -12,7 +12,7 @@ def test_buffer():
    buffer_size = 10
    nframes = 20

-    buffer = CircularBuffer(buffer_size, shape)
+    buffer = CircularBuffer(buffer_size)

    # partial load
    for i in range(buffer_size // 2):
@@ -59,7 +59,7 @@ def test_buffer():

 def test_buffer_manual():
    shape = (5, 5)
-    buffer = CircularBuffer(6, shape)
+    buffer = CircularBuffer(6)

    for i in range(6):
        matrix = np.full(shape, i)

--- a/tests/test_descriptor_fitting.py
+++ b/tests/test_descriptor_fitting.py
@@ -10,7 +10,7 @@ import gext.fitting
 import gext.grassmann
 import utils

-SMALL = 1e-8
+SMALL = 2e-8
 THRESHOLD = 5e-2

 @pytest.mark.parametrize("datafile", ["urea.json", "glucose.json"])
@@ -27,7 +27,7 @@ def test_least_square(datafile, regularization):
    # initialize an extrapolator
    extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms,
        nsteps=nframes, fitting_regularization=regularization,
-        fitting="leastsquare")
+        fitting="leastsquare", descriptor="distance")

    # load data in the extrapolator
    for (coords, coeff, overlap) in zip(data["trajectory"],
@@ -69,7 +69,8 @@ def test_quasi_time_reversible(datafile, regularization):

    # initialize an extrapolator
    extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms,
-        nsteps=nframes, fitting="qtr", fitting_regularization=regularization)
+        nsteps=nframes, fitting="qtr", fitting_regularization=regularization,
+        descriptor="distance")

    # load data in the extrapolator
    for (coords, coeff, overlap) in zip(data["trajectory"],
No results found