Skip to content
Snippets Groups Projects
Commit 7bfdb65d authored by Michele Nottoli's avatar Michele Nottoli
Browse files

Changed default for regularization, added a test.

parent 7c4d15ed
No related branches found
No related tags found
1 merge request!8Regularization
...@@ -64,7 +64,7 @@ class LeastSquare(AbstractFitting): ...@@ -64,7 +64,7 @@ class LeastSquare(AbstractFitting):
a = matrix.T @ matrix a = matrix.T @ matrix
b = matrix.T @ target b = matrix.T @ target
if self.options["regularization"] > 0.0: if self.options["regularization"] > 0.0:
a += np.identity(len(b))*self.options["regularization"] a += np.identity(len(b))*self.options["regularization"]**2
coefficients = np.linalg.solve(a, b) coefficients = np.linalg.solve(a, b)
return np.array(coefficients, dtype=np.float64) return np.array(coefficients, dtype=np.float64)
...@@ -103,7 +103,7 @@ class QuasiTimeReversible(AbstractFitting): ...@@ -103,7 +103,7 @@ class QuasiTimeReversible(AbstractFitting):
b = time_reversible_matrix.T @ (target + past_target) b = time_reversible_matrix.T @ (target + past_target)
if self.options["regularization"] > 0.0: if self.options["regularization"] > 0.0:
a += np.identity(len(b))*self.options["regularization"] a += np.identity(len(b))*self.options["regularization"]**2
coefficients = np.linalg.solve(a, b) coefficients = np.linalg.solve(a, b)
if q == 1: if q == 1:
......
...@@ -129,3 +129,71 @@ def test_time_reversibility(datafile): ...@@ -129,3 +129,71 @@ def test_time_reversibility(datafile):
# check the time reversibility # check the time reversibility
assert np.linalg.norm(fitted_target - fitted_target_reverse, ord=np.inf) < SMALL assert np.linalg.norm(fitted_target - fitted_target_reverse, ord=np.inf) < SMALL
@pytest.mark.parametrize("datafile", ["urea.json", "glucose.json"])
@pytest.mark.parametrize("regularization", [0.001, 0.005, 0.01, 0.05, 0.1])
def test_square_qtr_formulation(datafile, regularization):
# load test data from json file
data = utils.load_json(f"tests/{datafile}")
nelectrons = data["nelectrons"]
natoms = data["trajectory"].shape[1]
nbasis = data["overlaps"].shape[1]
nframes = data["trajectory"].shape[0]
# initialize an extrapolator
extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms,
nsteps=nframes, fitting="qtr", fitting_regularization=regularization)
# load data in the extrapolator
for (coords, coeff, overlap) in zip(data["trajectory"],
data["coefficients"], data["overlaps"]):
extrapolator.load_data(coords, coeff, overlap)
descriptors = extrapolator.descriptors.get(10)
target = descriptors[-1]
fitting_calculator = extrapolator.fitting_calculator
alt_fitting_calculator = utils.AlternativeQuasiTimeReversible()
alt_fitting_calculator.set_options(regularization=regularization)
# check if the two fittings give the same coefficients
for start in range(0, 8):
vectors = descriptors[start:-1]
fit_coefficients = fitting_calculator.fit(vectors, target)
alt_fit_coefficients = alt_fitting_calculator.fit(vectors, target)
assert np.linalg.norm(fit_coefficients - alt_fit_coefficients, ord=np.inf) < 1e-4
@pytest.mark.parametrize("datafile", ["urea.json", "glucose.json"])
@pytest.mark.parametrize("regularization", [0.001, 0.005, 0.01, 0.05, 0.1])
def test_square_ls_formulation(datafile, regularization):
# load test data from json file
data = utils.load_json(f"tests/{datafile}")
nelectrons = data["nelectrons"]
natoms = data["trajectory"].shape[1]
nbasis = data["overlaps"].shape[1]
nframes = data["trajectory"].shape[0]
# initialize an extrapolator
extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms,
nsteps=nframes, fitting="leastsquare", fitting_regularization=regularization)
# load data in the extrapolator
for (coords, coeff, overlap) in zip(data["trajectory"],
data["coefficients"], data["overlaps"]):
extrapolator.load_data(coords, coeff, overlap)
descriptors = extrapolator.descriptors.get(10)
target = descriptors[-1]
fitting_calculator = extrapolator.fitting_calculator
alt_fitting_calculator = utils.AlternativeLeastSquare()
alt_fitting_calculator.set_options(regularization=regularization)
# check if the two fittings give the same coefficients
for start in range(0, 8):
vectors = descriptors[start:-1]
fit_coefficients = fitting_calculator.fit(vectors, target)
alt_fit_coefficients = alt_fitting_calculator.fit(vectors, target)
assert np.linalg.norm(fit_coefficients - alt_fit_coefficients, ord=np.inf) < 1e-4
import os
import sys
import json import json
import numpy as np import numpy as np
from typing import List
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from gext.fitting import LeastSquare, QuasiTimeReversible
def load_json(path): def load_json(path):
with open(path, "r") as json_file: with open(path, "r") as json_file:
...@@ -12,3 +18,52 @@ def load_json(path): ...@@ -12,3 +18,52 @@ def load_json(path):
else: else:
data[key] = np.array(value) data[key] = np.array(value)
return data return data
class AlternativeLeastSquare(LeastSquare):
"""Alternative least square minimization fitting."""
def fit(self, vectors: List[np.ndarray], target: np.ndarray):
"""Given a set of vectors and a target return the fitting
coefficients."""
matrix = np.array(vectors).T
a = np.vstack((matrix, np.identity(len(vectors))*self.options["regularization"]))
b = np.concatenate((target, np.zeros(len(vectors))))
coefficients, _, _, _ = np.linalg.lstsq(a, b, rcond=-1)
return np.array(coefficients, dtype=np.float64)
class AlternativeQuasiTimeReversible(QuasiTimeReversible):
"""Quasi time reversible fitting scheme."""
def fit(self, vectors: List[np.ndarray], target: np.ndarray):
"""Given a set of vectors and a target return the fitting
coefficients in a quasi time reversible scheme."""
past_target = vectors[0]
matrix = np.array(vectors[1:]).T
q = matrix.shape[1]
if q == 1:
time_reversible_matrix = matrix
elif q%2 == 0:
time_reversible_matrix = matrix[:, :q//2] + matrix[:, :q//2-1:-1]
else:
time_reversible_matrix = matrix[:, :q//2+1] + matrix[:, :q//2-1:-1]
a = np.vstack((time_reversible_matrix,
np.identity(time_reversible_matrix.shape[1])*self.options["regularization"]))
b = np.concatenate((target + past_target, np.zeros((time_reversible_matrix.shape[1]))))
coefficients, _, _, _ = np.linalg.lstsq(a, b, rcond=-1)
if q == 1:
full_coefficients = np.concatenate(([-1.0], coefficients))
elif q%2 == 0:
full_coefficients = np.concatenate(([-1.0], coefficients,
coefficients[::-1]))
else:
full_coefficients = np.concatenate(([-1.0], coefficients[:-1],
2.0*coefficients[-1:], coefficients[-2::-1]))
return np.array(full_coefficients, dtype=np.float64)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment