Skip to content
Snippets Groups Projects
Commit 6d7c535a authored by Michele Nottoli's avatar Michele Nottoli
Browse files

Merge branch 'update' into 'main'

QTR

See merge request !6
parents 4e708ccc 9d524710
Branches
Tags
1 merge request!6QTR
Pipeline #1964 passed
......@@ -43,8 +43,16 @@ The behavior can be finely controlled by passing additional keyword arguments to
This is an up to date list of available keyword options:
- `nsteps`: integer parameter, number of steps to be used in the extrapolation.
**Note:** Calling `guess` before loading `nsteps` data points will cause a `ValueError`.
- `nsteps`: integer, default 6, number of steps to be used in the extrapolation.
- `verbose`: boolean, default False, if True print additional information.
- `descriptor`: string, default "distance", possible options are "distance" and "coulomb".
- `fitting`: string, default "leastsquare", possible options are "leastsquare" and "qtr".
- `allow_partially_filled`: bool, default True. If True allow to do a guess before `nsteps` data points have been loaded, if False asking for a guess before `nsteps` data points will cause a `ValueError`.
- `store_overlap`: bool, default True. Store the overlaps for later usage in calling guess without passing the current overlap. It can be disabled for performance, but calling guess will require passing the overlap.
Some options can be piped to the fitting modules.
- `fitting_regularization`: float, default 0.0. Controls the regularization for both the "leastsquare" and "qtr" fitting schemes.
## Acknowledgments
......
......@@ -7,6 +7,8 @@ class Distance:
"""Distance matrix descriptors."""
supported_options = {}
def __init__(self, **kwargs):
self.set_options(**kwargs)
......@@ -24,6 +26,8 @@ class Coulomb(Distance):
"""Coulomb matrix descriptors."""
supported_options = {}
def compute(self, coords: np.ndarray) -> np.ndarray:
"""Compute the Coulomb matrix as a descriptor."""
return 1.0/super().compute(coords)
......@@ -8,16 +8,29 @@ class AbstractFitting(abc.ABC):
"""Base class for fitting schemes."""
supported_options = {}
def __init__(self, **kwargs):
self.set_options(**kwargs)
@abc.abstractmethod
def set_options(self, **kwargs):
"""Base method for setting options."""
self.options = {}
for key, value in kwargs.items():
if key in self.supported_options:
self.options[key] = value
else:
raise ValueError(f"Unsupported option: {key}")
for option, default_value in self.supported_options.items():
if option not in self.options:
self.options[option] = default_value
@abc.abstractmethod
def compute(self, vectors: List[np.ndarray], target:np.ndarray):
def fit(self, vectors: List[np.ndarray], target:np.ndarray):
"""Base method for computing new fitting coefficients."""
return np.zeros(0)
def linear_combination(self, vectors: List[np.ndarray],
coefficients: np. ndarray) -> np.ndarray:
......@@ -38,34 +51,66 @@ class LeastSquare(AbstractFitting):
def set_options(self, **kwargs):
"""Set options for least square minimization"""
self.options = {}
for key, value in kwargs.items():
if key in self.supported_options:
self.options[key] = value
else:
raise ValueError(f"Unsupported option: {key}")
for option, default_value in self.supported_options.items():
if option not in self.options:
self.options[option] = default_value
super().set_options(**kwargs)
if self.options["regularization"] < 0 \
or self.options["regularization"] > 100:
raise ValueError("Unsupported value for regularization")
def compute(self, vectors: List[np.ndarray], target: np.ndarray):
def fit(self, vectors: List[np.ndarray], target: np.ndarray):
"""Given a set of vectors and a target return the fitting
coefficients."""
matrix = np.vstack(vectors).T
coefficients, _, _, _ = np.linalg.lstsq(matrix, target, rcond=None)
matrix = np.array(vectors).T
a = matrix.T @ matrix
b = matrix.T @ target
if self.options["regularization"] > 0.0:
a += np.identity(len(b))*self.options["regularization"]
coefficients = np.linalg.solve(a, b)
return np.array(coefficients, dtype=np.float64)
class QuasiTimeReversible(AbstractFitting):
"""Quasi time reversible fitting scheme. Not yet implemented."""
supported_options = {
"regularization": 0.0,
}
def set_options(self, **kwargs):
"""Set options for quasi time reversible fitting"""
super().set_options(**kwargs)
def compute(self, vectors: List[np.ndarray], target: np.ndarray):
if self.options["regularization"] < 0 \
or self.options["regularization"] > 100:
raise ValueError("Unsupported value for regularization")
def fit(self, vectors: List[np.ndarray], target: np.ndarray):
"""Time reversible least square minimization fitting."""
past_target = vectors[0]
matrix = np.array(vectors[1:]).T
q = matrix.shape[1]
if q == 1:
time_reversible_matrix = matrix
elif q%2 == 0:
time_reversible_matrix = matrix[:, :q//2] + matrix[:, :q//2-1:-1]
else:
time_reversible_matrix = matrix[:, :q//2+1] + matrix[:, :q//2-1:-1]
a = time_reversible_matrix.T @ time_reversible_matrix
b = time_reversible_matrix.T @ (target + past_target)
if self.options["regularization"] > 0.0:
a += np.identity(len(b))*self.options["regularization"]
coefficients = np.linalg.solve(a, b)
if q == 1:
full_coefficients = np.concatenate(([-1.0], coefficients))
elif q%2 == 0:
full_coefficients = np.concatenate(([-1.0], coefficients,
coefficients[::-1]))
else:
full_coefficients = np.concatenate(([-1.0], coefficients[:-1],
2.0*coefficients[-1:], coefficients[-2::-1]))
return np.array(full_coefficients, dtype=np.float64)
......@@ -12,19 +12,21 @@ class Extrapolator:
"""Class for performing Grassmann extrapolations. On initialization
it requires the number of electrons, the number of basis functions
and the number of atoms of the molecule. The number of previous
steps used by the extrapolator is an optional argument with default
value of 6."""
and the number of atoms of the molecule."""
supported_options = {
"verbose": False,
"nsteps": 6,
"descriptor": "distance",
"fitting": "leastsquare",
"allow_partially_filled": True,
"store_overlap": True,
}
def __init__(self, nelectrons: int, nbasis: int, natoms: int, **kwargs):
self.supported_options = {
"verbose": False,
"nsteps": 6,
"descriptor": "distance",
"fitting": "leastsquare",
"allow_partially_filled": True,
}
if not (type(nelectrons) == int and type(nbasis) == int and type(natoms) == int):
raise ValueError("Dimensions are not integers")
self.nelectrons = nelectrons
self.nbasis = nbasis
......@@ -32,9 +34,10 @@ class Extrapolator:
self.set_options(**kwargs)
self.gammas = CircularBuffer(self.options["nsteps"], (self.nelectrons//2, self.nbasis))
self.overlaps = CircularBuffer(self.options["nsteps"], (self.nbasis, self.nbasis))
self.descriptors = CircularBuffer(self.options["nsteps"],
((self.natoms - 1)*self.natoms//2, ))
if self.options["store_overlap"]:
self.overlaps = CircularBuffer(self.options["nsteps"], (self.nbasis, self.nbasis))
self.tangent: Optional[np.ndarray] = None
......@@ -47,6 +50,7 @@ class Extrapolator:
descriptor_options = {}
fitting_options = {}
# set specified options
for key, value in kwargs.items():
if key in self.supported_options:
self.options[key] = value
......@@ -57,11 +61,14 @@ class Extrapolator:
else:
raise ValueError(f"Unsupported option: {key}")
# set unspecified options with defaults
for option, default_value in self.supported_options.items():
if not option in self.options:
self.options[option] = default_value
if self.options["nsteps"] <= 1 or self.options["nsteps"] >= 100:
# do some check on the options, set things and pipe options
# to submodules
if self.options["nsteps"] < 1 or self.options["nsteps"] >= 100:
raise ValueError("Unsupported nsteps")
if self.options["descriptor"] == "distance":
......@@ -77,49 +84,68 @@ class Extrapolator:
elif self.options["fitting"] == "qtr":
self.fitting_calculator = QuasiTimeReversible()
else:
raise ValueError("Unsupported descriptor")
raise ValueError("Unsupported fitting")
self.fitting_calculator.set_options(**fitting_options)
def load_data(self, coords: np.ndarray, coeff: np.ndarray,
overlap: np.ndarray):
def load_data(self, coords: np.ndarray, coeff: np.ndarray, overlap):
"""Load a new data point in the extrapolator."""
# Crop the coefficient matrix up to the number of electron
# pairs, then apply S^1/2
coeff = self._crop_coeff(coeff)
coeff = self._normalize(coeff, overlap)
# if it is the first time we load data, set the tangent point
if self.tangent is None:
self._set_tangent(coeff)
# push the new data to the corresponding vectors
self.gammas.push(self._grassmann_log(coeff))
self.descriptors.push(self._compute_descriptor(coords))
self.overlaps.push(overlap)
def guess(self, coords: np.ndarray, overlap = None) -> np.ndarray:
if self.options["store_overlap"]:
self.overlaps.push(overlap)
def guess(self, coords: np.ndarray, overlap=None) -> np.ndarray:
"""Get a new electronic density matrix to be used as a guess."""
c_guess = self.guess_coefficients(coords, overlap)
return c_guess @ c_guess.T
def guess_coefficients(self, coords: np.ndarray, overlap = None) -> np.ndarray:
def guess_coefficients(self, coords: np.ndarray, overlap=None) -> np.ndarray:
"""Get a new coefficient matrix to be used as a guess."""
# check if we have enough data points to perform an extrapolation
count = self.descriptors.count
if self.options["allow_partially_filled"]:
n = min(self.options["nsteps"], self.descriptors.count)
if count == 0:
raise ValueError("Not enough data loaded in the extrapolator")
n = min(self.options["nsteps"], count)
else:
n = self.options["nsteps"]
if count < n:
raise ValueError("Not enough data loaded in the extrapolator")
if overlap is None and not self.options["store_overlap"]:
raise ValueError("Guessing without overlap requires `store_overlap` true.")
# use the descriptors to find the fitting coefficients
prev_descriptors = self.descriptors.get(n)
descriptor = self._compute_descriptor(coords)
fit_coefficients = self.fitting_calculator.compute(prev_descriptors, descriptor)
fit_coefficients = self._fit(prev_descriptors, descriptor)
# use the fitting coefficients and the previous gammas to
# extrapolate a new gamma
gammas = self.gammas.get(n)
gamma = self.fitting_calculator.linear_combination(gammas, fit_coefficients)
fit_descriptor = self.fitting_calculator.linear_combination(
prev_descriptors, fit_coefficients)
if self.options["verbose"]:
fit_descriptor = self.fitting_calculator.linear_combination(
prev_descriptors, fit_coefficients)
print("error on descriptor:", \
np.linalg.norm(fit_descriptor - descriptor, ord=np.inf))
# if the overlap is not given, use the coefficients to fit
# a new overlap
if overlap is None:
overlaps = self.overlaps.get(n)
overlap = self.fitting_calculator.linear_combination(overlaps, fit_coefficients)
......@@ -127,10 +153,9 @@ class Extrapolator:
else:
inverse_sqrt_overlap = self._inverse_sqrt_overlap(overlap)
# use the overlap and gamma to find a new set of coefficients
c_guess = self._grassmann_exp(gamma)
c_guess = inverse_sqrt_overlap @ c_guess
return c_guess
return inverse_sqrt_overlap @ c_guess
def _get_tangent(self) -> np.ndarray:
"""Get the tangent point."""
......@@ -176,3 +201,8 @@ class Extrapolator:
def _compute_descriptor(self, coords) -> np.ndarray:
"""Given a set of coordinates compute the corresponding descriptor."""
return self.descriptor_calculator.compute(coords)
def _fit(self, prev_descriptors, descriptor) -> np.ndarray:
"""Fit the current descriptor using previous descriptors and
the specified fitting scheme."""
return self.fitting_calculator.fit(prev_descriptors, descriptor)
......@@ -10,10 +10,12 @@ import gext.fitting
import gext.grassmann
import utils
SMALL = 1e-10
SMALL = 1e-8
THRESHOLD = 5e-2
@pytest.mark.parametrize("datafile", ["urea.json", "glucose.json"])
def test_descriptor_fitting(datafile):
@pytest.mark.parametrize("regularization", [0.0, 0.01, 0.05])
def test_least_square(datafile, regularization):
# load test data from json file
data = utils.load_json(f"tests/{datafile}")
......@@ -23,33 +25,107 @@ def test_descriptor_fitting(datafile):
nframes = data["trajectory"].shape[0]
# initialize an extrapolator
extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms, nsteps=nframes)
extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms,
nsteps=nframes, fitting_regularization=regularization,
fitting="leastsquare")
# load data in the extrapolator
for (coords, coeff, overlap) in zip(data["trajectory"],
data["coefficients"], data["overlaps"]):
extrapolator.load_data(coords, coeff, overlap)
# we check if the error goes down with a larger data set
errors = []
descriptors = extrapolator.descriptors.get(10)
target = descriptors[-1]
fitting_calculator = gext.fitting.LeastSquare()
fitting_calculator = extrapolator.fitting_calculator
# check if things are reasonable
for start in range(0, 9):
vectors = descriptors[start:-1]
fit_coefficients = fitting_calculator.compute(vectors, target)
fit_coefficients = fitting_calculator.fit(vectors, target)
fitted_target = fitting_calculator.linear_combination(vectors, fit_coefficients)
errors.append(np.linalg.norm(target - fitted_target, ord=np.inf))
error = np.linalg.norm(target - fitted_target, ord=np.inf)
assert error < THRESHOLD
assert errors[0] < errors[-1]
# we check that we get a zero error if we put the target in the vectors
# used for the fitting
# if we put the target in the vectors used for the fitting,
# check that we get an error smaller than the regularization
vectors = descriptors[:-1]
vectors[0] = target
fit_coefficients = fitting_calculator.compute(vectors, target)
fit_coefficients = fitting_calculator.fit(vectors, target)
fitted_target = fitting_calculator.linear_combination(vectors, fit_coefficients)
assert np.linalg.norm(target - fitted_target, ord=np.inf) < SMALL
assert np.linalg.norm(target - fitted_target, ord=np.inf) < max(SMALL, regularization)
@pytest.mark.parametrize("datafile", ["urea.json", "glucose.json"])
@pytest.mark.parametrize("regularization", [0.0, 0.01, 0.05])
def test_quasi_time_reversible(datafile, regularization):
# load test data from json file
data = utils.load_json(f"tests/{datafile}")
nelectrons = data["nelectrons"]
natoms = data["trajectory"].shape[1]
nbasis = data["overlaps"].shape[1]
nframes = data["trajectory"].shape[0]
# initialize an extrapolator
extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms,
nsteps=nframes, fitting="qtr", fitting_regularization=regularization)
# load data in the extrapolator
for (coords, coeff, overlap) in zip(data["trajectory"],
data["coefficients"], data["overlaps"]):
extrapolator.load_data(coords, coeff, overlap)
descriptors = extrapolator.descriptors.get(10)
target = descriptors[-1]
fitting_calculator = extrapolator.fitting_calculator
# check if things are reasonable
for start in range(0, 8):
vectors = descriptors[start:-1]
fit_coefficients = fitting_calculator.fit(vectors, target)
fitted_target = fitting_calculator.linear_combination(vectors, fit_coefficients)
error = np.linalg.norm(target - fitted_target, ord=np.inf)
assert error < THRESHOLD
@pytest.mark.parametrize("datafile", ["urea.json", "glucose.json"])
def test_time_reversibility(datafile):
# load test data from json file
data = utils.load_json(f"tests/{datafile}")
nelectrons = data["nelectrons"]
natoms = data["trajectory"].shape[1]
nbasis = data["overlaps"].shape[1]
nframes = data["trajectory"].shape[0]
# initialize an extrapolator
extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms,
nsteps=nframes, fitting="qtr")
# load data in the extrapolator
for (coords, coeff, overlap) in zip(data["trajectory"],
data["coefficients"], data["overlaps"]):
extrapolator.load_data(coords, coeff, overlap)
descriptors = extrapolator.descriptors.get(10)
# we symmetrize the future and past targets (remember that it is
# quasi time reversible, not exactly time reversible)
target = descriptors[0] + descriptors[-1]
descriptors[0] = target
descriptors[-1] = target
fitting_calculator = extrapolator.fitting_calculator
# fit the future target
fit_coefficients = fitting_calculator.fit(descriptors[:-1], descriptors[-1])
fitted_target = fitting_calculator.linear_combination(descriptors[:-1], fit_coefficients)
# fit the past target
reversed_descriptors = list(reversed(descriptors))
fit_coefficients = fitting_calculator.fit(reversed_descriptors[:-1], reversed_descriptors[-1])
fitted_target_reverse = fitting_calculator.linear_combination(reversed_descriptors[:-1], fit_coefficients)
# check the time reversibility
assert np.linalg.norm(fitted_target - fitted_target_reverse, ord=np.inf) < SMALL
......@@ -9,10 +9,13 @@ import gext.grassmann
import utils
SMALL = 1e-10
THRESHOLD = 1e-2
THRESHOLD = 5e-2
@pytest.mark.parametrize("datafile", ["urea.json", "glucose.json"])
def test_extrapolation(datafile):
@pytest.mark.parametrize("fitting", ["leastsquare", "qtr"])
@pytest.mark.parametrize("regularization", [0.0, 1e-6, 5e-6])
@pytest.mark.parametrize("descriptor", ["distance", "coulomb"])
def test_extrapolation(datafile, fitting, regularization, descriptor):
# load test data from json file
data = utils.load_json(f"tests/{datafile}")
......@@ -26,7 +29,9 @@ def test_extrapolation(datafile):
assert n < nframes
# initialize an extrapolator
extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms, nsteps=n)
extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms,
nsteps=n, fitting=fitting, fitting_regularization=regularization,
descriptor=descriptor)
# load data in the extrapolator up to index n - 1
for (coords, coeff, overlap) in zip(data["trajectory"][:n],
......@@ -106,3 +111,37 @@ def test_coefficient_extrapolation(datafile):
assert np.linalg.norm(guessed_density - density, ord=np.inf) < THRESHOLD
assert np.linalg.norm(guessed_density - density, ord=np.inf) \
/np.linalg.norm(density, ord=np.inf) < THRESHOLD
@pytest.mark.parametrize("datafile", ["urea.json", "glucose.json"])
def test_errors(datafile):
# load test data from json file
data = utils.load_json(f"tests/{datafile}")
nelectrons = data["nelectrons"]
natoms = data["trajectory"].shape[1]
nbasis = data["overlaps"].shape[1]
nframes = data["trajectory"].shape[0]
# amount of data we want to use for fitting
n = 9
assert n < nframes
# initialize an extrapolator
extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms, nsteps=n)
with pytest.raises(ValueError):
extrapolator.guess(data["trajectory"][0])
# initialize a new extrapolator
extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms, nsteps=n,
allow_partially_filled=False)
# load data in the extrapolator up to index m - 1
m = 4
for (coords, coeff, overlap) in zip(data["trajectory"][:m],
data["coefficients"][:m], data["overlaps"][:m]):
extrapolator.load_data(coords, coeff, overlap)
# check an extrapolation at index m
with pytest.raises(ValueError):
extrapolator.guess(data["trajectory"][m])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment