diff --git a/gext/fitting.py b/gext/fitting.py index 0e50e2f133c801f029724f62dc2d57276ab8d87e..41fc250b5fa617723da8b9cf25259bd948b20635 100644 --- a/gext/fitting.py +++ b/gext/fitting.py @@ -62,14 +62,15 @@ class DiffFitting(AbstractFitting): """Given a set of vectors and a target return the fitting coefficients.""" if len(vectors) == 1: - raise ValueError("DiffFit does not work for one vector") + raise ValueError("DiffFitting does not work for one vector") target=target-vectors[-1] - target=target.flatten() - VECTORS=[] + + diff_vectors = [] for i in range(1, len(vectors)): each_vector=vectors[i-1]-vectors[-1] - VECTORS.append(each_vector.flatten()) - matrix = np.array(VECTORS).T + diff_vectors.append(each_vector) + + matrix = np.array(diff_vectors).T a = matrix.T @ matrix b = matrix.T @ target if self.options["regularization"] > 0.0: @@ -82,12 +83,12 @@ class DiffFitting(AbstractFitting): """Given a set of vectors (or matrices) and the corresponding coefficients, build their linear combination.""" if len(vectors) == 1: - raise ValueError("DiffFit does not work for one vector") + raise ValueError("DiffFitting does not work for one vector") result = np.zeros(vectors[0].shape, dtype=np.float64) - VECTORS_DiffFitting=[] + diff_vectors = [] for i in range(1,len(vectors)): - VECTORS_DiffFitting.append(vectors[i-1]-vectors[-1]) - for coeff, vector in zip(coefficients, VECTORS_DiffFitting): + diff_vectors.append(vectors[i-1]-vectors[-1]) + for coeff, vector in zip(coefficients, diff_vectors): result += vector*coeff result=result+vectors[-1] return result diff --git a/tests/test_descriptor_fitting.py b/tests/test_descriptor_fitting.py index d8afedef53bac16bb958eae970e458f58e23a748..eae015a4855d72c532b84646b472ec1dc6dfce2f 100644 --- a/tests/test_descriptor_fitting.py +++ b/tests/test_descriptor_fitting.py @@ -130,3 +130,46 @@ def test_time_reversibility(datafile): # check the time reversibility assert np.linalg.norm(fitted_target - fitted_target_reverse, ord=np.inf) < SMALL + +@pytest.mark.parametrize("datafile", ["urea.json", "glucose.json"]) +@pytest.mark.parametrize("regularization", [0.0, 0.01, 0.05]) +def test_diff_fitting(datafile, regularization): + + # load test data from json file + data = utils.load_json(f"tests/{datafile}") + nelectrons = data["nelectrons"] + natoms = data["trajectory"].shape[1] + nbasis = data["overlaps"].shape[1] + nframes = data["trajectory"].shape[0] + + # initialize an extrapolator + extrapolator = gext.Extrapolator(nelectrons, nbasis, natoms, + nsteps=nframes, fitting="diff", fitting_regularization=regularization, + descriptor="distance") + + # load data in the extrapolator + for (coords, coeff, overlap) in zip(data["trajectory"], + data["coefficients"], data["overlaps"]): + extrapolator.load_data(coords, coeff, overlap) + + descriptors = extrapolator.descriptors.get(10) + target = descriptors[-1] + + fitting_calculator = extrapolator.fitting_calculator + + # check if things are reasonable + for start in range(0, 8): + vectors = descriptors[start:-1] + fit_coefficients = fitting_calculator.fit(vectors, target) + fitted_target = fitting_calculator.linear_combination(vectors, fit_coefficients) + error = np.linalg.norm(target - fitted_target, ord=np.inf) + assert error < THRESHOLD + + # if we put the target in the vectors used for the fitting, + # check that we get an error smaller than the regularization + vectors = descriptors[:-1] + vectors[0] = target + fit_coefficients = fitting_calculator.fit(vectors, target) + fitted_target = fitting_calculator.linear_combination(vectors, fit_coefficients) + + assert np.linalg.norm(target - fitted_target, ord=np.inf) < max(SMALL, regularization)