Skip to content
Snippets Groups Projects
Commit b5cbeecb authored by Michele Nottoli's avatar Michele Nottoli
Browse files

Merge branch 'options' into 'main'

Options

See merge request !4
parents 55c3f32b 0a72fd08
No related branches found
No related tags found
1 merge request!4Options
Pipeline #1952 passed
"""Module which provides functions to compute descriptors."""
"""Module which provides functionality to compute descriptors."""
import numpy as np
from scipy.spatial.distance import pdist
def distance(coords: np.ndarray) -> np.ndarray:
"""Compute the distance matric as a descriptor."""
return pdist(coords, metric="euclidean")
class Distance:
def coulomb(coords: np.ndarray) -> np.ndarray:
"""Compute the Coulomb matrix as a descriptor."""
return 1.0/distance(coords)
"""Distance matrix descriptors."""
def __init__(self, **kwargs):
self.set_options(**kwargs)
def set_options(self, **kwargs):
"""Given an option dictionary set the valid options and
raise an error if there are invalid ones."""
if len(kwargs) > 0:
raise ValueError("Invalid arguments given to the descriptor class.")
def compute(self, coords: np.ndarray) -> np.ndarray:
"""Compute the distance matric as a descriptor."""
return pdist(coords, metric="euclidean")
class Coulomb(Distance):
"""Coulomb matrix descriptors."""
def compute(self, coords: np.ndarray) -> np.ndarray:
"""Compute the Coulomb matrix as a descriptor."""
return 1.0/super().compute(coords)
"""Module that defines fitting functions."""
"""Module which provides functionality to perform fitting."""
import abc
from typing import List
import numpy as np
def linear(vectors: List[np.ndarray], target: np.ndarray):
class AbstractFitting(abc.ABC):
"""Base class for fitting schemes."""
def __init__(self, **kwargs):
self.set_options(**kwargs)
@abc.abstractmethod
def set_options(self, **kwargs):
"""Base method for setting options."""
@abc.abstractmethod
def compute(self, vectors: List[np.ndarray], target:np.ndarray):
"""Base method for computing new fitting coefficients."""
def linear_combination(self, vectors: List[np.ndarray],
coefficients: np. ndarray) -> np.ndarray:
"""Given a set of vectors (or matrices) and the corresponding
coefficients, build their linear combination."""
result = np.zeros(vectors[0].shape, dtype=np.float64)
for coeff, vector in zip(coefficients, vectors):
result += vector*coeff
return result
class LeastSquare(AbstractFitting):
"""Simple least square minimization fitting."""
matrix = np.vstack(vectors).T
coefficients, _, _, _ = np.linalg.lstsq(matrix, target, rcond=None)
return np.array(coefficients, dtype=np.float64)
def quasi_time_reversible():
"""Time reversible least square minimization fitting."""
def linear_combination(vectors: List[np.ndarray], coefficients: np.ndarray) -> np.ndarray:
"""Given a set of vectors (or matrices) and the corresponding
coefficients, build their linear combination."""
result = np.zeros(vectors[0].shape, dtype=np.float64)
for coeff, vector in zip(coefficients, vectors):
result += vector*coeff
return result
supported_options = {
"regularization": 0.0,
}
def set_options(self, **kwargs):
"""Set options for least square minimization"""
self.options = {}
for key, value in kwargs.items():
if key in self.supported_options:
self.options[key] = value
else:
raise ValueError(f"Unsupported option: {key}")
for option, default_value in self.supported_options.items():
if option not in self.options:
self.options[option] = default_value
if self.options["regularization"] < 0 \
or self.options["regularization"] > 100:
raise ValueError("Unsupported value for regularization")
def compute(self, vectors: List[np.ndarray], target: np.ndarray):
"""Given a set of vectors and a target return the fitting
coefficients."""
matrix = np.vstack(vectors).T
coefficients, _, _, _ = np.linalg.lstsq(matrix, target, rcond=None)
return np.array(coefficients, dtype=np.float64)
class QuasiTimeReversible(AbstractFitting):
"""Quasi time reversible fitting scheme. Not yet implemented."""
def set_options(self, **kwargs):
"""Set options for quasi time reversible fitting"""
def compute(self, vectors: List[np.ndarray], target: np.ndarray):
"""Time reversible least square minimization fitting."""
......@@ -4,8 +4,8 @@ from typing import Optional
import numpy as np
from . import grassmann
from . import fitting
from . import descriptors
from .fitting import LeastSquare, QuasiTimeReversible
from .descriptors import Distance, Coulomb
from .buffer import CircularBuffer
class Extrapolator:
......@@ -14,24 +14,71 @@ class Extrapolator:
it requires the number of electrons, the number of basis functions
and the number of atoms of the molecule. The number of previous
steps used by the extrapolator is an optional argument with default
value of 10."""
value of 6."""
def __init__(self, nelectrons: int, nbasis: int, natoms: int,
nsteps: int = 6, **kwargs):
def __init__(self, nelectrons: int, nbasis: int, natoms: int, **kwargs):
self.supported_options = {
"verbose": False,
"nsteps": 6,
"descriptor": "distance",
"fitting": "leastsquare",
"allow_partially_filled": True,
}
self.nelectrons = nelectrons
self.nbasis = nbasis
self.natoms = natoms
self.nsteps = nsteps
self.set_options(**kwargs)
self.gammas = CircularBuffer(self.nsteps, (self.nelectrons//2, self.nbasis))
self.overlaps = CircularBuffer(self.nsteps, (self.nbasis, self.nbasis))
self.descriptors = CircularBuffer(self.nsteps,
self.gammas = CircularBuffer(self.options["nsteps"], (self.nelectrons//2, self.nbasis))
self.overlaps = CircularBuffer(self.options["nsteps"], (self.nbasis, self.nbasis))
self.descriptors = CircularBuffer(self.options["nsteps"],
((self.natoms - 1)*self.natoms//2, ))
self.tangent: Optional[np.ndarray] = None
self._set_options(**kwargs)
def set_options(self, **kwargs):
"""Given an arbitrary amount of keyword arguments, parse them if
specified, set default values if not specified and raise an error
if invalid arguments are passed."""
self.options = {}
descriptor_options = {}
fitting_options = {}
for key, value in kwargs.items():
if key in self.supported_options:
self.options[key] = value
elif key.startswith("descriptor_"):
descriptor_options[key[11:]] = value
elif key.startswith("fitting_"):
fitting_options[key[8:]] = value
else:
raise ValueError(f"Unsupported option: {key}")
for option, default_value in self.supported_options.items():
if not option in self.options:
self.options[option] = default_value
if self.options["nsteps"] <= 1 or self.options["nsteps"] >= 100:
raise ValueError("Unsupported nsteps")
if self.options["descriptor"] == "distance":
self.descriptor_calculator = Distance()
elif self.options["descriptor"] == "coulomb":
self.descriptor_calculator = Coulomb()
else:
raise ValueError("Unsupported descriptor")
self.descriptor_calculator.set_options(**descriptor_options)
if self.options["fitting"] == "leastsquare":
self.fitting_calculator = LeastSquare()
elif self.options["fitting"] == "qtr":
self.fitting_calculator = QuasiTimeReversible()
else:
raise ValueError("Unsupported descriptor")
self.fitting_calculator.set_options(**fitting_options)
def load_data(self, coords: np.ndarray, coeff: np.ndarray,
overlap: np.ndarray):
......@@ -49,21 +96,28 @@ class Extrapolator:
def guess(self, coords: np.ndarray, overlap = None) -> np.ndarray:
"""Get a new electronic density to be used as a guess."""
prev_descriptors = self.descriptors.get(self.nsteps)
if self.options["allow_partially_filled"]:
n = min(self.options["nsteps"], self.descriptors.count)
else:
n = self.options["nsteps"]
prev_descriptors = self.descriptors.get(n)
descriptor = self._compute_descriptor(coords)
fit_coefficients = fitting.linear(prev_descriptors, descriptor)
fit_coefficients = self.fitting_calculator.compute(prev_descriptors, descriptor)
gammas = self.gammas.get(self.nsteps)
gamma = fitting.linear_combination(gammas, fit_coefficients)
gammas = self.gammas.get(n)
gamma = self.fitting_calculator.linear_combination(gammas, fit_coefficients)
fit_descriptor = fitting.linear_combination(prev_descriptors, fit_coefficients)
fit_descriptor = self.fitting_calculator.linear_combination(
prev_descriptors, fit_coefficients)
if self.options["verbose"]:
print("error on descriptor:", np.linalg.norm(fit_descriptor - descriptor, ord=np.inf))
print("error on descriptor:", \
np.linalg.norm(fit_descriptor - descriptor, ord=np.inf))
if overlap is None:
overlaps = self.overlaps.get(self.nsteps)
overlap = fitting.linear_combination(overlaps, fit_coefficients)
overlaps = self.overlaps.get(n)
overlap = self.fitting_calculator.linear_combination(overlaps, fit_coefficients)
inverse_sqrt_overlap = self._inverse_sqrt_overlap(overlap)
else:
inverse_sqrt_overlap = self._inverse_sqrt_overlap(overlap)
......@@ -73,14 +127,6 @@ class Extrapolator:
return c_guess @ c_guess.T
def _set_options(self, **kwargs):
"""Parse additional options from the additional keyword arguments."""
self.options = {}
if "verbose" in kwargs:
self.options["verbose"] = kwargs["verbose"]
else:
self.options["verbose"] = False
def _get_tangent(self) -> np.ndarray:
"""Get the tangent point."""
if self.tangent is not None:
......@@ -124,4 +170,4 @@ class Extrapolator:
def _compute_descriptor(self, coords) -> np.ndarray:
"""Given a set of coordinates compute the corresponding descriptor."""
return descriptors.distance(coords)
return self.descriptor_calculator.compute(coords)
......@@ -23,7 +23,7 @@ def test_descriptor_fitting(datafile):
nframes = data["trajectory"].shape[0]
# initialize an extrapolator
extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms, nframes)
extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms, nsteps=nframes)
# load data in the extrapolator
for (coords, coeff, overlap) in zip(data["trajectory"],
......@@ -35,10 +35,12 @@ def test_descriptor_fitting(datafile):
descriptors = extrapolator.descriptors.get(10)
target = descriptors[-1]
fitting_calculator = gext.fitting.LeastSquare()
for start in range(0, 9):
vectors = descriptors[start:-1]
fit_coefficients = gext.fitting.linear(vectors, target)
fitted_target = gext.fitting.linear_combination(vectors, fit_coefficients)
fit_coefficients = fitting_calculator.compute(vectors, target)
fitted_target = fitting_calculator.linear_combination(vectors, fit_coefficients)
errors.append(np.linalg.norm(target - fitted_target, ord=np.inf))
assert errors[0] < errors[-1]
......@@ -47,7 +49,7 @@ def test_descriptor_fitting(datafile):
# used for the fitting
vectors = descriptors[:-1]
vectors[0] = target
fit_coefficients = gext.fitting.linear(vectors, target)
fitted_target = gext.fitting.linear_combination(vectors, fit_coefficients)
fit_coefficients = fitting_calculator.compute(vectors, target)
fitted_target = fitting_calculator.linear_combination(vectors, fit_coefficients)
assert np.linalg.norm(target - fitted_target, ord=np.inf) < SMALL
......@@ -26,7 +26,7 @@ def test_extrapolation(datafile):
assert n < nframes
# initialize an extrapolator
extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms, n)
extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms, nsteps=n)
# load data in the extrapolator up to index n - 1
for (coords, coeff, overlap) in zip(data["trajectory"][:n],
......@@ -41,3 +41,35 @@ def test_extrapolation(datafile):
assert np.linalg.norm(guessed_density - density, ord=np.inf) < THRESHOLD
assert np.linalg.norm(guessed_density - density, ord=np.inf) \
/np.linalg.norm(density, ord=np.inf) < THRESHOLD
@pytest.mark.parametrize("datafile", ["urea.json", "glucose.json"])
def test_partial_extrapolation(datafile):
# load test data from json file
data = utils.load_json(f"tests/{datafile}")
nelectrons = data["nelectrons"]
natoms = data["trajectory"].shape[1]
nbasis = data["overlaps"].shape[1]
nframes = data["trajectory"].shape[0]
# amount of data we want to use for fitting
n = 9
m = 5
assert n < nframes
# initialize an extrapolator
extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms, nsteps=n)
# load data in the extrapolator up to index n - 1
for (coords, coeff, overlap) in zip(data["trajectory"][:m],
data["coefficients"][:m], data["overlaps"][:m]):
extrapolator.load_data(coords, coeff, overlap)
# check an extrapolation at index n
guessed_density = extrapolator.guess(data["trajectory"][m], data["overlaps"][m])
coeff = data["coefficients"][m][:, :nelectrons//2]
density = coeff @ coeff.T
assert np.linalg.norm(guessed_density - density, ord=np.inf) < THRESHOLD
assert np.linalg.norm(guessed_density - density, ord=np.inf) \
/np.linalg.norm(density, ord=np.inf) < THRESHOLD
......@@ -21,7 +21,7 @@ def test_grassmann(datafile):
nframes = data["trajectory"].shape[0]
# initialize an extrapolator
extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms, nframes)
extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms, nsteps=nframes)
# load data in the extrapolator
for (coords, coeff, overlap) in zip(data["trajectory"],
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment