From 61d77698d0a94d81140f1055ddbee043d34be3b1 Mon Sep 17 00:00:00 2001 From: Tizian Wenzel <wenzeltn@nbanm02.mathematik.uni-stuttgart.de> Date: Thu, 8 Jun 2023 09:18:03 +0200 Subject: [PATCH] Initial commit of all required files. --- section_4.1_compute_visualize.py | 105 +++++++++ section_4.2_compute.py | 75 +++++++ section_4.2_visualize.py | 115 ++++++++++ section_4.3_compute.py | 109 ++++++++++ section_4.3_visualize.py | 247 +++++++++++++++++++++ utils/cv_rippa_ext.py | 83 ++++++++ utils/dataset_collection.py | 291 +++++++++++++++++++++++++ utils/hyperparameters.py | 163 ++++++++++++++ utils/kernels.py | 283 ++++++++++++++++++++++++ utils/main_function.py | 285 +++++++++++++++++++++++++ utils/optimized_kernel.py | 117 ++++++++++ utils/tkernels.py | 195 +++++++++++++++++ utils/vkoga.py | 354 +++++++++++++++++++++++++++++++ 13 files changed, 2422 insertions(+) create mode 100644 section_4.1_compute_visualize.py create mode 100644 section_4.2_compute.py create mode 100644 section_4.2_visualize.py create mode 100644 section_4.3_compute.py create mode 100644 section_4.3_visualize.py create mode 100644 utils/cv_rippa_ext.py create mode 100644 utils/dataset_collection.py create mode 100644 utils/hyperparameters.py create mode 100644 utils/kernels.py create mode 100644 utils/main_function.py create mode 100644 utils/optimized_kernel.py create mode 100644 utils/tkernels.py create mode 100644 utils/vkoga.py diff --git a/section_4.1_compute_visualize.py b/section_4.1_compute_visualize.py new file mode 100644 index 0000000..3f6dcfd --- /dev/null +++ b/section_4.1_compute_visualize.py @@ -0,0 +1,105 @@ +# DATA-DRIVEN KERNEL DESIGNS FOR OPTIMIZED GREEDY SCHEMES: A MACHINE LEARNING PERSPECTIVE by +# TIZIAN WENZEL, FRANCESCO MARCHETTI, EMMA PERRACCHIONE +# Code related to the numerical experiment within Section 4.1. "Function approximation +# on the unit cube" to produce the plots within Figure 2. + + +import torch +from P36_Francesco_Emma.utilities.main_function import run_everything, run_cross_validation +from P36_Francesco_Emma.utilities.hyperparameters import dic_hyperparams + +import numpy as np +from matplotlib import pyplot as plt +from scipy import io +import os + + +## Some settings +# name_dataset = 'example_5d_faster_conv' +name_dataset = 'example_6d_kink' +# name_dataset = 'example_7d_semiactive' + + +## Load the hyperparameters +hyperparameter = dic_hyperparams[name_dataset] + + +## Run everything +A_start, A_optimized, model, model_vkoga1, model_vkoga2, data, \ + array_concatenate, _, dic_timings_2L = run_everything( + name_dataset, + hyperparameter.maxIter_vkoga, hyperparameter.N_points, + hyperparameter.noise_level, hyperparameter.reg_para_optim, hyperparameter.reg_para_vkoga, + hyperparameter.learning_rate, hyperparameter.n_epochs, + hyperparameter.batch_size, hyperparameter.n_folds, + flag_initialize_diagonal=hyperparameter.flag_initialize_diagonal, + flag_symmetric_A=hyperparameter.flag_symmetric, + flag_gaussian=hyperparameter.flag_gaussian) + + +## Run cross-validation +array_eps, array_cv_f, array_cv_f_val, _, list_timings_1L = run_cross_validation( + name_dataset, # required only for hyperparameters + data, hyperparameter.maxIter_vkoga, + flag_gaussian=hyperparameter.flag_gaussian, n_cross_val=hyperparameter.n_cross_val) + + + +## Store in matlab for beautiful tikzfigure plots +path_for_results = os.getcwd() + '/P36_Francesco_Emma/paper_experiments/results/' +os.makedirs(path_for_results, exist_ok=True) +io.savemat(path_for_results + name_dataset + '.mat', + dict(array_concatenate=array_concatenate, + list_objectives=model.list_obj, + train_hist_deep_f=model_vkoga1.train_hist['f'], + train_hist_deep_f_val=model_vkoga1.train_hist['f val'], + train_hist_std_f=model_vkoga2.train_hist['f'], + train_hist_std_f_val=model_vkoga2.train_hist['f val'], + array_eps=array_eps, + array_cv_f=array_cv_f, + array_cv_f_val=array_cv_f_val, + dic_timings_2L = dic_timings_2L, + list_timings_1L = list_timings_1L)) + +# in Matlab: +# results = load('gong.mat') + + +## Some plots +R = np.linspace(0, 1, int(1.5*array_cv_f.shape[1])) +array_color=plt.cm.hsv(R) + +# Visualization of the decay of the L-infty error +plt.figure(1) +plt.clf() +plt.plot(model_vkoga1.train_hist['f'], 'k') +# plt.plot(model_vkoga1.train_hist['f val'], 'k--') +for idx_para, para_ep in enumerate(array_eps): + plt.plot(array_cv_f[:, idx_para], color=array_color[idx_para, :]) + # plt.plot(array_cv_f[:, idx_para], '--', color=array_color[idx_para, :]) +plt.title('training error') +plt.xscale('log') +plt.yscale('log') +plt.legend(['2layered'] + [para_ep for para_ep in np.round(array_eps, 3)]) +plt.draw() + + +# Visualization of the evolution of some matrix entries +n_epochs = array_concatenate.shape[-1] + +plt.figure(2) +plt.clf() +list_legend = [] +for k in range(array_concatenate.shape[0]): + plt.plot(np.arange(1, n_epochs + 1), array_concatenate[k, k, :]) + list_legend.append('A[{},{}]'.format(k, k)) +plt.plot(np.arange(1, n_epochs + 1), array_concatenate[0, 1, :], '--') +plt.plot(np.arange(1, n_epochs + 1), array_concatenate[1, 0, :], '--') +plt.legend(list_legend + ['A[0,1]', 'A[1,0]']) +plt.title('matrix entries') +plt.xscale('log') +plt.xlabel('iterations') +plt.draw() + + + diff --git a/section_4.2_compute.py b/section_4.2_compute.py new file mode 100644 index 0000000..150836e --- /dev/null +++ b/section_4.2_compute.py @@ -0,0 +1,75 @@ +# DATA-DRIVEN KERNEL DESIGNS FOR OPTIMIZED GREEDY SCHEMES: A MACHINE LEARNING PERSPECTIVE by +# TIZIAN WENZEL, FRANCESCO MARCHETTI, EMMA PERRACCHIONE +# Code related to the numerical experiment within Section 4.2. "Machine learning data +# sets", especially to produce the results for the Figures 3-5. + + +import torch +from P36_Francesco_Emma.utilities.main_function import run_everything, run_cross_validation +from P36_Francesco_Emma.utilities.hyperparameters import dic_hyperparams + +import numpy as np +from matplotlib import pyplot as plt +from scipy import io +import os + + +list_datasets = ['fried', 'sarcos', 'protein', 'ct', 'diamonds', + 'stock', 'kegg_undir_uci', 'online_video', + 'wecs', 'mlr_knn_rng', 'query_agg_count', + 'sgemm', 'road_network', 'methane', 'poker'] #, 'susy', 'higgs'] + + +## Loop over reruns and datasets +for idx_indices in [0, 1, 2, 3, 4]: + for idx_dataset, name_dataset in enumerate(list_datasets): + + ## Load the hyperparameters + hyperparameter = dic_hyperparams[name_dataset] + + + ## Run everything + A_start, A_optimized, model, model_vkoga1, model_vkoga2, data, \ + array_concatenate, array_test_rmse_deep, dic_timings_2L = run_everything( + name_dataset, + hyperparameter.maxIter_vkoga, hyperparameter.N_points, + hyperparameter.noise_level, hyperparameter.reg_para_optim, hyperparameter.reg_para_vkoga, + hyperparameter.learning_rate, hyperparameter.n_epochs, + hyperparameter.batch_size, hyperparameter.n_folds, + flag_initialize_diagonal=hyperparameter.flag_initialize_diagonal, + flag_symmetric_A=hyperparameter.flag_symmetric, + flag_gaussian=hyperparameter.flag_gaussian, + flag_optim_verbose=False, + flag_std_vkoga=False, + idx_rerun=idx_indices) + + + ## Run cross-validation + array_eps, array_cv_f, array_cv_f_val, array_test_rmse_cv, list_timings_1L = \ + run_cross_validation( + name_dataset, # required only for hyperparameters + data, hyperparameter.maxIter_vkoga, + flag_gaussian=hyperparameter.flag_gaussian, + n_cross_val=hyperparameter.n_cross_val) + + + ## Store in matlab for beautiful tikzfigure plots + path_for_results = os.getcwd() + '/P36_Francesco_Emma/paper_experiments/results/' + os.makedirs(path_for_results, exist_ok=True) + io.savemat(path_for_results + name_dataset + '_{}'.format(idx_indices) + '.mat', + dict(array_concatenate=array_concatenate, + list_objectives=model.list_obj, + train_hist_deep_f=model_vkoga1.train_hist['f'], + train_hist_deep_f_val=model_vkoga1.train_hist['f val'], + train_hist_std_f=model_vkoga2.train_hist['f'], + train_hist_std_f_val=model_vkoga2.train_hist['f val'], + array_eps=array_eps, + array_cv_f=array_cv_f, + array_cv_f_val=array_cv_f_val, + array_test_rmse_deep=array_test_rmse_deep, + array_test_rmse_cv=array_test_rmse_cv, + dic_timings_2L=dic_timings_2L, + list_timings_1L=list_timings_1L)) + + # in Matlab: + # results = load('gong.mat') diff --git a/section_4.2_visualize.py b/section_4.2_visualize.py new file mode 100644 index 0000000..d286d58 --- /dev/null +++ b/section_4.2_visualize.py @@ -0,0 +1,115 @@ +# DATA-DRIVEN KERNEL DESIGNS FOR OPTIMIZED GREEDY SCHEMES: A MACHINE LEARNING PERSPECTIVE by +# TIZIAN WENZEL, FRANCESCO MARCHETTI, EMMA PERRACCHIONE +# Code related to the numerical experiment within Section 4.2. "Machine learning data +# sets", especially to produce Figure 5. + + +import numpy as np +from matplotlib import pyplot as plt +from scipy import io +import os +import scipy +import utilities + + +## Some settings +list_datasets = ['fried', 'sarcos', 'protein', 'ct', 'diamonds', + 'stock', 'kegg_undir_uci', 'online_video', + 'wecs', 'mlr_knn_rng', 'query_agg_count', + 'sgemm', 'road_network', 'methane', 'poker'] #, 'susy', 'higgs'] + +basepath = utilities.get_basepath() +path_for_results = basepath + 'P36_Francesco_Emma/paper_experiments/results_5reruns/' + + +## Initialize dictionaries to store several quantities +# Write all results into a dictionary: +# dic_eigvals: ratio of cumsummed absolute singular values +# dic_accuracy_deep: test rmse of 2L model +# dic_accuracy_beststd: test rmse of best standard model +dic_singular_vals = {} +dic_accuracy_deep = {} +dic_accuracy_beststd = {} +for name_dataset in list_datasets: + dic_singular_vals[name_dataset] = {} + dic_accuracy_deep[name_dataset] = {} + dic_accuracy_beststd[name_dataset] = {} + + +## Read all file, perform computations, save results +flag_first_npy_file = True +for idx_file, file in enumerate(os.listdir(path_for_results)): + if file == 'README.md' or file == 'save': + continue + + name_dataset = '_'.join(file.split('_')[:-1]) + idx_rerun = int(file[-5]) + + # Compute and store singular value ratios - enforce that they are ordered! + results = scipy.io.loadmat(path_for_results + file) + A_final = results['array_concatenate'][:, :, -1] + + singular_vals = np.linalg.svd(A_final)[1] + abs_sing_vals = np.abs(singular_vals) + ordered_abs_sing_vals = np.sort(abs_sing_vals)[::-1] + + ratio = np.cumsum(ordered_abs_sing_vals) / np.sum(ordered_abs_sing_vals) + + dic_singular_vals[name_dataset][idx_rerun] = ratio + + # Compute and store accuracies + dic_accuracy_deep[name_dataset][idx_rerun] = results['array_test_rmse_deep'] + dic_accuracy_beststd[name_dataset][idx_rerun] = np.min(results['array_test_rmse_cv'], axis=0) + + +## Calculate ranking where 2L performs best to worst: Use the average mean of improvement for this +array_ratio = np.zeros(len(list_datasets)) + +for idx_dataset, name_dataset in enumerate(list_datasets): + + array_deep = np.stack([dic_accuracy_deep[name_dataset][idx_rerun] for idx_rerun in range(5)]) + array_beststd = np.stack([dic_accuracy_beststd[name_dataset][idx_rerun] for idx_rerun in range(5)]) + + ratio = np.mean(np.mean(array_deep, axis=0) / np.mean(array_beststd, axis=0)) + + array_ratio[idx_dataset] = ratio + +indices_sorted = np.argsort(array_ratio) + + +## Print the calculated ratio: This shows, when 2L is superior +for idx_sorted in indices_sorted: + print('{:20}'.format(list_datasets[idx_sorted]), np.round(array_ratio[idx_sorted], 5)) +indices_sorted = indices_sorted[:-3] # cut off last three datasets + + +## Visualization of the ratio of singular values compared to sum of all singular values +R = np.linspace(0, 1, int(1.0*len(list_datasets))) +array_color=plt.cm.hsv(R) + +dic_to_store = {} + +list_legend = [] +plt.figure(2) +plt.clf() +idx_color = len(indices_sorted) +for idx_sorted in indices_sorted[::-1]: + name_dataset = list_datasets[idx_sorted] + array = np.stack([dic_singular_vals[name_dataset][idx_rerun] for idx_rerun in range(5)]) + plt.errorbar(np.linspace(0, 1, array.shape[1]), np.mean(array, axis=0), yerr=np.std(array, axis=0), + color=array_color[idx_color, :]) + # list_legend.append('{} '.format(idx_color) + name_dataset) + list_legend.append(name_dataset) + idx_color -= 1 + + dic_to_store[name_dataset] = array +plt.legend(list_legend) + + +## Store in matlab for beautiful tikzfigure plots +# io.savemat('dic_singular_value_ratio.mat', dic_to_store) + + + + + diff --git a/section_4.3_compute.py b/section_4.3_compute.py new file mode 100644 index 0000000..74e6a3b --- /dev/null +++ b/section_4.3_compute.py @@ -0,0 +1,109 @@ +# DATA-DRIVEN KERNEL DESIGNS FOR OPTIMIZED GREEDY SCHEMES: A MACHINE LEARNING PERSPECTIVE by +# TIZIAN WENZEL, FRANCESCO MARCHETTI, EMMA PERRACCHIONE +# Code related to the numerical experiment within Section 4.3. "Stability of the kernel +# optimization", especially to produce the results for Figure 6. + +# Similar to 03_4_stability_investigations.py, but now also running the VKOGA after the kernel optimization. +# I ran this file on ic6, ic7, ic8 to compute the results which are collected in the folder results_stability. + +import os +import numpy as np +from scipy.stats import ortho_group # Requires version 0.18 of scipy +import pickle +import utilities + +from P36_Francesco_Emma.utilities.dataset_collection import Dataset +from P36_Francesco_Emma.utilities.hyperparameters import dic_hyperparams +from P36_Francesco_Emma.utilities.main_function import run_everything + + +## Some settings +list_nctrs = [int(np.round(nr)) for nr in np.logspace(np.log(10) / np.log(10), np.log(1000) / np.log(10), 10)] + +basepath = utilities.get_basepath() +path_for_results = basepath + 'P36_Francesco_Emma/paper_experiments/results_stability/' + + +flag_gaussian = False +n_reruns = 5 + +list_datasets = ['fried', 'sarcos', 'protein', 'ct', 'diamonds', + 'stock', 'kegg_undir_uci', 'online_video', + 'wecs', 'mlr_knn_rng', 'query_agg_count', + 'sgemm', 'road_network', 'methane', 'poker'] #, 'susy', 'higgs'] + + +## Loop over all datasets +for idx_dataset, name_dataset in enumerate(list_datasets): + + # Load dataset + dataset = Dataset() + X, y = dataset.get_data(name_dataset) + hyperparameter = dic_hyperparams[name_dataset] + + # list_A_start = [np.eye(X.shape[1])] + [ortho_group.rvs(dim=X.shape[1]) for _ in range(n_reruns - 1)] + list_A_start = [np.eye(X.shape[1]) for _ in range(n_reruns)] + + # Go only for a fixed set of indices - do not aim at error bars plot! + list_idx = [1, 2, 3, 4] + + ## Loop to compute and store all initial and final matrices for the different nfolds optimizations + dic_results = {} + for idx_index in list_idx: + dic_results[idx_index] = {} + + list_nfolds = [64, 32, 16, 8, 4] + assert len(list_nfolds) == len(list_A_start), 'list_nfolds is not of same length as list_A_start' + + ## Load the hyperparameters + hyperparameter = dic_hyperparams[name_dataset] + + ## Run everything for 2L + for nfold in list_nfolds: + + A_start, A_optimized, model, model_vkoga1, model_vkoga2, data, _, array_test_rmse_deep = run_everything( + name_dataset, + hyperparameter.maxIter_vkoga, hyperparameter.N_points, + hyperparameter.noise_level, hyperparameter.reg_para_optim, hyperparameter.reg_para_vkoga, + hyperparameter.learning_rate, hyperparameter.n_epochs, + hyperparameter.batch_size, + n_folds=nfold, + flag_initialize_diagonal=hyperparameter.flag_initialize_diagonal, + flag_symmetric_A=hyperparameter.flag_symmetric, + flag_gaussian=hyperparameter.flag_gaussian, + flag_optim_verbose=False, + flag_std_vkoga=False, + idx_rerun=idx_index) + + # Store the initial and the final matrix + dic_results[idx_index][nfold] = [A_start, A_optimized, model_vkoga1.train_hist['f'], + array_test_rmse_deep] + + + ## Run everything for 1L (there is only one!) --> nfold = 0 + A_start, A_optimized, _, model_vkoga1, _, _, _, array_test_rmse_deep, _ = run_everything( + name_dataset, + hyperparameter.maxIter_vkoga, hyperparameter.N_points, + hyperparameter.noise_level, hyperparameter.reg_para_optim, hyperparameter.reg_para_vkoga, + hyperparameter.learning_rate, n_epochs=0, + batch_size=hyperparameter.batch_size, + n_folds=list_nfolds[0], # 64 <-- does not matter as long as we use n_epochs=0 + flag_initialize_diagonal=hyperparameter.flag_initialize_diagonal, + flag_symmetric_A=hyperparameter.flag_symmetric, + flag_gaussian=hyperparameter.flag_gaussian, + flag_optim_verbose=False, + flag_std_vkoga=False, + idx_rerun=idx_index) + + # Store the initial and the final matrix + dic_results[idx_index][0] = [A_start, A_optimized, model_vkoga1.train_hist['f'], + array_test_rmse_deep] + + + ## Pickle store dic results + os.makedirs(path_for_results, exist_ok=True) + filename = 'dic_results_{}_'.format(idx_index) + name_dataset + '.pkl' + file_pickle = os.path.join(path_for_results, filename) + with open(file_pickle, 'wb') as f: + pickle.dump(dic_results, f) + diff --git a/section_4.3_visualize.py b/section_4.3_visualize.py new file mode 100644 index 0000000..2d2ce7d --- /dev/null +++ b/section_4.3_visualize.py @@ -0,0 +1,247 @@ +# DATA-DRIVEN KERNEL DESIGNS FOR OPTIMIZED GREEDY SCHEMES: A MACHINE LEARNING PERSPECTIVE by +# TIZIAN WENZEL, FRANCESCO MARCHETTI, EMMA PERRACCHIONE +# Code related to the numerical experiment within Section 4.3. "Stability of the kernel +# optimization", especially to produce the plots for Figure 6. + +# Evaluation and analysis of the results which are obtained from 04_1_stability_investigations.py. +# Only the wecs dataset is a bit weird: Here I do not understand what is going on: +# Only the first singular vectors are aligned, the other ones are not! + +import os +from scipy import io +import numpy as np +from scipy.stats import ortho_group # Requires version 0.18 of scipy +from matplotlib import pyplot as plt +import pickle +import scipy +import utilities + + +## Some settings +list_nctrs = [int(np.round(nr)) for nr in np.logspace(np.log(10) / np.log(10), np.log(1000) / np.log(10), 10)] + +basepath = utilities.get_basepath() +path_for_results = basepath + 'P36_Francesco_Emma/paper_experiments/results_stability/' + + +## First, collect results from all files in a common dictionary +dic_results = {} # dic_results --> name_dataset --> idx_index --> nfold --> [A_start, A_optimized, model_vkoga1.train_hist['f'], array_test_rmse_deep] +for idx_file, file in enumerate(os.listdir(path_for_results)): + idx_index = int(file.split('_')[2]) + name_dataset = ('_'.join(file.split('_')[3:])).split('.')[:-1][0] + print(name_dataset) + + # Load file + with open(path_for_results + file, 'rb') as f: + dic_test_preds = pickle.load(f) + + # Key name_dataset already available? + if name_dataset not in dic_results: + dic_results[name_dataset] = {} + + # Enter all the data + for idx_index in dic_test_preds: + dic_results[name_dataset][idx_index] = dic_test_preds[idx_index] + + + +## Second, loop over name_dataset and compute singular value decompositions +dic_singvals = {} # dic_singvals --> name_dataset --> idx_index --> nfold --> [A_start, A_optimized, model_vkoga1.train_hist['f'], array_test_rmse_deep] +dic_singvecs = {} + +for idx_dataset, name_dataset in enumerate(dic_results): + + # if name_dataset != 'wecs' and name_dataset != 'ct' and name_dataset != 'sgemm': + # continue + + dic_singvals[name_dataset] = {} + dic_singvecs[name_dataset] = {} + + for idx_index in dic_results[name_dataset]: + + dic_singvals[name_dataset][idx_index] = {} + dic_singvecs[name_dataset][idx_index] = {} + + for nfold in dic_results[name_dataset][idx_index]: + + # Compute singular values of finally optimized matrix + A_start = dic_results[name_dataset][idx_index][nfold][0] + A_optimized = dic_results[name_dataset][idx_index][nfold][1] + + # Use singular value decomposition + u, singular_vals, vh = np.linalg.svd(A_optimized) # A == (u @ np.diag(s)) @ vh + indices = np.argsort(np.abs(singular_vals)) + indices = indices[::-1] + + singular_vals = singular_vals[indices] + singular_vecs = u[:, indices] + + dic_singvals[name_dataset][idx_index][nfold] = singular_vals + dic_singvecs[name_dataset][idx_index][nfold] = singular_vecs + + + +## Third, loop over name_dataset and compute quantities for plots and visualize them (average over the reruns) +max_dim = 15 # maximum dimension to which we want to check similarity + +nfold_ref = 64 # we will use this as a reference model! +list_idx = [0, 1, 2, 3, 4] + +for idx_dataset, name_dataset in enumerate(list(dic_results.keys())): + + # Skip several datasets + if name_dataset in ['methane', 'poker', 'protein']: + continue + if name_dataset not in ['kegg_undir_uci', 'fried', 'ct', 'mlr_knn_rng']: + continue + + max_dim1 = min([max_dim, dic_singvals[name_dataset][idx_index][nfold].shape[0]]) + + + ## Compute quantities for the subsequent plots + dic_alignment = {} + dic_singvalsarray = {} + + for idx_nfold, nfold in enumerate(list(dic_results[name_dataset][0].keys())): + + # if nfold == nfold_ref: + # continue # this is our reference model + + + # Average results over the different runs (i.e. the different training test splits) + list_list_similarities = [] + for idx_index in list_idx: + + list_similarity = [] + for dim in range(max_dim1): + array = scipy.linalg.subspace_angles(dic_singvecs[name_dataset][idx_index][nfold][:, :(dim + 1)], + dic_singvecs[name_dataset][idx_index][nfold_ref][:, :(dim + 1)]) + + # Maybe modify this measure here? 90 degress means orthogonality!! + similarity = array[0] + list_similarity.append(np.rad2deg(similarity)) + + list_list_similarities.append(list_similarity) + + array_angles = np.stack(list_list_similarities) + + # Write values to dic - to store it: Added "A" in the beginning because otherwise matlab raises an error + dic_alignment['A{}'.format(nfold)] = array_angles + dic_singvalsarray['A{}'.format(nfold)] = np.stack([dic_singvals[name_dataset][idx_index][nfold] for idx_index in dic_singvals[name_dataset]]) + + + ## Errorbar plot of subspace angles + plt.figure(800 + idx_dataset) + plt.clf() + list_legend = [] + for idx_nfold, nfold in enumerate(list(dic_results[name_dataset][0].keys())): + plt.errorbar(x=np.arange(1, 1 + max_dim1) + (-2 + idx_nfold) * .07, # shift of 0.07 otherwise errorbars overlap + y=np.mean(dic_alignment['A{}'.format(nfold)], axis=0), + yerr=np.std(dic_alignment['A{}'.format(nfold)], axis=0)) + list_legend.append('{} vs {}'.format(nfold_ref, nfold if not nfold == 0 else '1L')) + + plt.title(name_dataset + '(dim={}): Alignment of subspaces'.format(dic_singvals[name_dataset][idx_index][nfold].shape[0])) + plt.legend(list_legend) + + + ## Errorbar plot of singular values + plt.figure(900 + idx_dataset) + plt.clf() + list_legend = [] + for idx_nfold, nfold in enumerate(list(dic_results[name_dataset][0].keys())): + plt.errorbar(x=np.arange(1, 1 + dic_singvalsarray['A0'].shape[1]) + (-2 + idx_nfold) * .07, + y=np.mean(dic_singvalsarray['A{}'.format(nfold)], axis=0), + yerr=np.std(dic_singvalsarray['A{}'.format(nfold)], axis=0)) + list_legend.append('{} vs {}'.format(nfold_ref, nfold if not nfold == 0 else '1L')) + + plt.title(name_dataset + '(dim={}): Singular values'.format(dic_singvals[name_dataset][idx_index][nfold].shape[0])) + plt.legend(list_legend) + plt.yscale('log') + + + ## Store in matlab for beautiful tikzfigure plots + io.savemat('dic_stabilityalignment_' + name_dataset + '_nfolds.mat', dic_alignment) + io.savemat('dic_singvals_' + name_dataset + '_nfolds.mat', dic_singvalsarray) + + if idx_dataset > 20: + break + + + +# ============================================================================= +# ============ The following code is meant for some investigations ============ +# ============================================================================= + + + +## Single plot: Compute the distances between the largest eigenspaces. We use nfolds = 64 as reference! +max_dim = 15 # maximum dimension to which we want to check similarity +name_dataset = 'wecs' + +nfold_ref = 64 # we will use this as a reference model! +list_idx = [0, 1, 2, 3, 4] + +for idx_index in list_idx: + + plt.figure(110 + idx_index) + plt.clf() + list_legend = [] + + for nfold in dic_results[name_dataset][idx_index]: + if nfold == nfold_ref: + continue # this is our reference model + + list_similarity = [] + for dim in range(max_dim): + array_angles = scipy.linalg.subspace_angles(dic_singvecs[name_dataset][idx_index][nfold][:, :(dim + 1)], + dic_singvecs[name_dataset][idx_index][nfold_ref][:, :(dim + 1)]) + + # Maybe modify this measure here? 90 degress means orthogonality!! + similarity = array_angles[0] + + list_similarity.append(np.rad2deg(similarity)) + + plt.plot(list(range(max_dim)), list_similarity) + list_legend.append('{} vs {}'.format(nfold_ref, nfold)) + + # energy_captured = np.sum(np.abs(dic_singvals[idx_index][idx_nfold][-10:])) / np.sum(np.abs(dic_singvals[idx_index][idx_nfold])) + # plt.title(name_dataset + ': energy captured within {} dims: {:.3f}%'.format(max_dim, 100*energy_captured)) + plt.title(name_dataset + '(dim={}): Alignment of subspaces'.format(dic_singvals[name_dataset][idx_index][nfold].shape[0])) + plt.legend(list_legend) + + +## Single plot: Plot the first few singular vectors +dic_colors = {0: 'k', 4: 'm', 8: 'g', 16: 'r', 32: 'b', 64: 'y'} +max_vectors = 3 # maximum dimension to which we want to check similarity +name_dataset = 'wecs' + +list_idx = [0, 1, 2, 3, 4] + +for idx_vector in range(max_vectors): + + plt.figure(1000 + idx_vector) + plt.clf() + list_legend = [] + + flag_legend = True + for idx_index in list_idx: + + for nfold in dic_results[name_dataset][idx_index]: + + if flag_legend: + list_legend.append(nfold) + + # ToDo: Flip if necessary + plt.plot(dic_singvecs[name_dataset][idx_index][nfold][:, idx_vector], color=dic_colors[nfold]) + + + flag_legend = False + + + # energy_captured = np.sum(np.abs(dic_singvals[idx_index][idx_nfold][-10:])) / np.sum(np.abs(dic_singvals[idx_index][idx_nfold])) + # plt.title(name_dataset + ': energy captured within {} dims: {:.3f}%'.format(max_dim, 100*energy_captured)) + plt.title(name_dataset + '(dim={}): Plot of singular vectors'.format(dic_singvals[name_dataset][idx_index][nfold].shape[0])) + plt.legend(list_legend) + + + diff --git a/utils/cv_rippa_ext.py b/utils/cv_rippa_ext.py new file mode 100644 index 0000000..d053604 --- /dev/null +++ b/utils/cv_rippa_ext.py @@ -0,0 +1,83 @@ +# Code copied from P07_ML_MORE/Utilities/cv_rippa_ext.py and slightly modified. + +import torch + + +def compute_cv_loss_via_rippa_ext(kernel, X, y, n_folds, reg_for_matrix_inversion): + """ + Implementation with the need to provide a kernel and points + """ + + # Some precomputations + kernel_matrix = kernel(X, X) + reg_for_matrix_inversion * torch.eye(X.shape[0]) + inv_kernel_matrix = torch.inverse(kernel_matrix) + coeffs = torch.linalg.solve(kernel_matrix, y) #[0] + + # Some initializations and preparations: It is required that n_folds divides y.shape[0] without remainder + array_error = torch.zeros(y.shape[0], 1) + n_per_fold = int(y.shape[0] / n_folds) + indices = torch.arange(0, y.shape[0]).view(n_per_fold, n_folds) + + # Standard Rippa's scheme + if n_folds == y.shape[0]: + array_error = coeffs / torch.diag(inv_kernel_matrix) + + # Extended Rippa's scheme + else: + for j in range(n_folds): + # inv_kernel_matrix_loc = inv_kernel_matrix[indices[:, j], indices[:, j]] + inv_kernel_matrix_loc1 = inv_kernel_matrix[indices[:, j], :] + inv_kernel_matrix_loc = inv_kernel_matrix_loc1[:, indices[:, j]] + + array_error[j * n_per_fold: (j+1) * n_per_fold, 0] = \ + (torch.solve(coeffs[indices[:, j]], inv_kernel_matrix_loc)[0]).view(-1) + + cv_error_sq = torch.sum(array_error ** 2) / array_error.shape[0] + + return cv_error_sq, array_error + + +def compute_cv_loss_via_rippa_ext_2(kernel_matrix, y, n_folds, reg_for_matrix_inversion): + """ + Implementation without the need to provide a kernel and points: Simply provide the kernel matrix + """ + + # Some precomputations + kernel_matrix_reg = kernel_matrix + reg_for_matrix_inversion * torch.eye(kernel_matrix.shape[0]) + inv_kernel_matrix = torch.inverse(kernel_matrix_reg) + coeffs = torch.linalg.solve(kernel_matrix_reg, y) #[0] + + # Some initializations and preparations: It is required that n_folds divides y.shape[0] without remainder + array_error = torch.zeros(y.shape[0], 1) + n_per_fold = int(y.shape[0] / n_folds) + indices = torch.arange(0, y.shape[0]).view(n_per_fold, n_folds) + + # Standard Rippa's scheme + if n_folds == y.shape[0]: + array_error = coeffs / torch.diag(inv_kernel_matrix).view(-1,1) + + # Extended Rippa's scheme + else: + for j in range(n_folds): + # inv_kernel_matrix_loc = inv_kernel_matrix[indices[:, j], indices[:, j]] + inv_kernel_matrix_loc1 = inv_kernel_matrix[indices[:, j], :] + inv_kernel_matrix_loc = inv_kernel_matrix_loc1[:, indices[:, j]] + + # array_error[j * n_per_fold: (j+1) * n_per_fold, 0] = \ + # (torch.solve(coeffs[indices[:, j]], inv_kernel_matrix_loc)[0]).view(-1) + array_error[j * n_per_fold: (j+1) * n_per_fold, 0] = \ + (torch.linalg.solve(inv_kernel_matrix_loc, coeffs[indices[:, j]])).view(-1) + + # list_for_block = [] + # for j in range(n_folds): + # inv_kernel_matrix_loc1 = inv_kernel_matrix[j*n_per_fold : (j+1)*n_per_fold, :] + # inv_kernel_matrix_loc = inv_kernel_matrix_loc1[:, j*n_per_fold : (j+1)*n_per_fold] + # list_for_block.append(inv_kernel_matrix_loc) + # + # from scipy.linalg import block_diag + # block_matrix = block_diag(list_for_block) # blockmatrix, each block is of size n_per_fold x n_per_fold! + # array_error2 = torch.linalg.solve(block_matrix, coeffs) + + cv_error_sq = torch.sum(array_error ** 2) / array_error.numel() + + return cv_error_sq, array_error diff --git a/utils/dataset_collection.py b/utils/dataset_collection.py new file mode 100644 index 0000000..58a1fab --- /dev/null +++ b/utils/dataset_collection.py @@ -0,0 +1,291 @@ +import numpy as np +import pandas as pd +import os +import h5py +import math +import scipy.io as spio +from P32_just_interpolate.utilities.utils import load_mnist_pair, get_basepath +import utilities + +class Dataset(): + """ + Implementation of several toy and real world data sets + """ + + def __init__(self, N_points=10000): + + self.N_points = N_points + self.X = None + self.y = None + + self.dic_dataset = { + 'example_2d': (lambda x: .02 * (x[:, [0]] + x[:, [1]]) ** 2 + np.sin(2 * math.pi * (x[:, [0]] - x[:, [1]])), 2), + 'example_2d_tiz': (lambda x: x[:, [0]] + (x[:, [0]] + .1 * x[:, [1]]) ** 2, 2), + 'example_2d_radial': (lambda x: np.exp(-4 * np.sum((x - .5 * np.ones_like(x)) ** 2, axis=1, keepdims=True)), 2), + 'example_2d_active_1': (lambda x: np.abs(x[:, [0]] - 2*x[:, [1]]), 2), + 'example_2d_franke': (lambda x: 0.75 * np.exp(-(9 * x[:, [0]] - 2) ** 2 / 4 - (9 * x[:, [1]]- 2) ** 2 / 4) + + 0.75 * np.exp(-(9 * x[:, [0]] + 1) ** 2 / 49 - (9 * x[:, [1]] + 1) / 10) + + 0.5 * np.exp(-(9 * x[:, [0]] - 7) ** 2 / 4 - (9 * x[:, [1]] - 3) ** 2 / 4) + - 0.2 * np.exp(-(9 * x[:, [0]] - 4) ** 2 - (9 * x[:, [1]] - 7) ** 2), 2), + 'example_3d_active_1': (lambda x: x[:, [0]] + 2 * x[:, [1]], 3), + 'example_3d_active_2': (lambda x: (x[:, [0]] + x[:, [1]]) ** 2 + np.sin(2 * math.pi * (x[:, [0]] - x[:, [1]])), 3), + 'example_10d_active': (lambda x: x[:, [0]] * (x[:, [1]] - x[:, [2]]) ** 3 + 2 * np.sin(math.pi * (x[:, [1]] - x[:, [2]])) - np.exp(-2 * x[:, [2]]), 10), + 'example_10d_vanishing': (lambda x: x @ ((np.arange(10) + 1.0) ** (-2)).reshape(-1, 1), 10), + 'example_10d_radial': (lambda x: np.exp(-4 * np.sum((x[:, :5] - .5 * np.ones_like(x[:, :5])) ** 2, axis=1, keepdims=True)), 10), + 'example_5d_radial': (lambda x: np.exp(-4 * np.sum((x[:, :] - .5 * np.ones_like(x[:, :])) ** 2, axis=1, keepdims=True)), 5), + 'example_5d_active': (lambda x: (x[:, [1]] - x[:, [2]]), 5), + 'example_5d_faster_conv': (lambda x: np.exp(-4 * np.sum(x[:, :] - .5 * np.ones_like(x[:, :]), axis=1, keepdims=True) ** 2), 5), + 'example_6d_kink': (lambda x: (np.exp(-4 * np.sum((x[:, :] - .5 * np.ones_like(x[:, :])) ** 2, axis=1, keepdims=True)) + 2 * np.abs(x[:, [0]] - .5)), 6), + 'example_7d_semiactive': (lambda x: np.exp(-4 * np.sum((x[:, :] - .5 * np.ones_like(x[:, :])) ** 2, axis=1, keepdims=True)) + + np.exp(-9 * np.sum((x[:, :2] - .3 * np.ones_like(x[:, :2])) ** 2, axis=1, keepdims=True)), 7), + # 'example_winkle': (lambda x: ((np.sin(x[:, [0]]) * np.exp(x[:, [1]]) + x[:, [2]]) ** ((x[:, [3]] + 1) ** 2)) + # / ((x[:, [3]] + 1) ** 2), 5), + } + + + def get_data(self, name_dataset): + + X, y = None, None + + if name_dataset in self.dic_dataset.keys(): + fcn, dim = self.dic_dataset[name_dataset] + + X = np.random.rand(self.N_points, dim) + y = fcn(X) + + elif name_dataset == 'example_2d_clusters': + X, y = self.example_2d_clusters() + elif name_dataset[:24] == 'example_highdim_clusters': + dim = int(name_dataset.split('_')[-1]) + X, y = self.example_highdim_clusters(dim) + elif name_dataset[:10] == 'toyexample': + X, y = self.toyexample(name_dataset) + else: + if name_dataset[:13] == 'example_mnist': + X, y = self.example_mnist(name_dataset) + elif name_dataset == 'example_TUD': + X, y = self.example_TUD() + elif name_dataset in ['ct', 'diamonds', 'fried', 'kegg_undir_uci', + 'methane', 'mlr_knn_rng', 'online_video', 'poker', + 'protein', 'query_agg_count', 'road_network', 'sarcos', 'sgemm', + 'stock', 'wecs']: + X, y = self.example_holzmuller(name_dataset) + elif name_dataset in ['flights', 'higgs', 'susy', 'taxi', 'timit']: + X, y = self.falkon_datasets(name_dataset) + elif name_dataset in ['uci_airfoil_self_noise', 'uci_CCPP']: + X, y = self.example_uci(name_dataset) + + assert X is not None, 'Bug in get_data!' + assert y is not None, 'Bug in get_data!' + + return X, y + + def example_2d_clusters(self): + + X1 = np.random.randn(100, 2) + X2 = .1 * np.random.randn(500, 2) + np.array([.5, .5]) + X3 = .2 * np.random.randn(500, 2) + np.array([-.5, -.5]) + + f_func = lambda x: 1 / (1 + np.abs(x[:, 0] - .5)) + + X = np.concatenate([X1, X2, X3], axis=0) + y = f_func(X) + + return X, y + + def example_highdim_clusters(self, dim): + # In the meeting with Antoine and Giacomo we cam up with a hypothesis when greedy works better. + + # X1 = np.random.rand(1000, dim) + # X2 = .1 * np.random.rand(5000, dim) + .5 * np.zeros((1, dim)) + # X3 = .2 * np.random.rand(5000, dim) - .5 * np.zeros((1, dim)) + # X = np.concatenate([X1, X2, X3], axis=0) + # + # y = np.ones((X.shape[0], 1)) + # y[X[:, 0] > .5] = -1 + + N_points = self.N_points + + # Example 1: Peak at center, then decay. randn distribution. + # X = np.random.randn(N_points, dim) + # center = .5 * np.ones((1, dim)) + # y = 1 / (1 + 5 * np.linalg.norm(X - center, axis=1, keepdims=True)) + # y = y + 1e-3 * np.random.randn(y.shape[0], 1) + + + # Example2: Peak at center, then decay. sparse randn distribution and two clusters. + X1 = np.random.randn(int(.1*N_points), dim) + X2 = .1 * np.random.rand(int(.45*N_points), dim) + .5 * np.ones((1, dim)) + X3 = .2 * np.random.rand(int(.45*N_points), dim) - .5 * np.ones((1, dim)) + X = np.concatenate([X1, X2, X3], axis=0) + + center = .5 * np.ones((1, dim)) + y = 1 / (1 + 5 * np.linalg.norm(X - center, axis=1, keepdims=True)) + y = y + 1e-3 * np.random.randn(y.shape[0], 1) + + return X, y + + + def example_mnist(self, name_dataset): + """MNIST dataset""" + + X_train, X_test, y_train, y_test = \ + load_mnist_pair(get_basepath(), + [int(name_dataset[-2]), int(name_dataset[-1])]) + + X = np.concatenate([X_train, X_test], axis=0) + y = np.concatenate([y_train, y_test], axis=0).reshape(-1, 1) + + return X, y + + def example_uci(self, name_dataset): + """Some UCI datasets""" + + # path = '/usr/local/home/wenzeltn/deepkernel-pytorch2/data/UCI/' + path = '/home/wenzeltn/local_home/deepkernel-pytorch2/data/UCI/' + + if name_dataset == 'uci_airfoil_self_noise': + data = np.loadtxt(path + 'airfoil_self_noise.dat', unpack=True).transpose() + + X = data[:, :5] + y = data[:, [5]] + + X = (X - X.mean(axis=0, keepdims=True)) / (X.std(axis=0, keepdims=True) + 1e-30) + y = (y - y.mean(axis=0, keepdims=True)) / (y.std(axis=0, keepdims=True) + 1e-30) + + if name_dataset == 'uci_CCPP': + import pandas as pd + data = pd.read_excel(path + 'CCPP/' + 'Folds5x2_pp.xlsx').to_numpy() + + X = data[:, :4] + y = data[:, [4]] + + X = (X - X.mean(axis=0, keepdims=True)) / (X.std(axis=0, keepdims=True) + 1e-30) + y = (y - y.mean(axis=0, keepdims=True)) / (y.std(axis=0, keepdims=True) + 1e-30) + + return X, y + + def example_TUD(self): + """Felix Döppel data""" + + path = '/home/wenzeltn/local_home/deepkernel-pytorch2/P01_DK_optimization/data/' + # path = '/usr/local/home/wenzeltn/deepkernel-pytorch2/P01_DK_optimization/data/' + mat_train = spio.loadmat(path + 'DataTUDKinetics.mat', squeeze_me=True) + # mat_test = spio.loadmat(path + 'TestTUDKinetics.mat', squeeze_me=True) + + X = mat_train['input'] + y = mat_train['output'].reshape(-1, 1) + + return X, y + + def example_holzmuller(self, ds_name): + """Data set from David Holzmüller paper of batch active learning.""" + + if os.path.exists('/usr/local/home/wenzeltn'): # anm03 + path = '/usr/local/home/wenzeltn/deepkernel-pytorch2/data/data_holzmuller/' + elif os.path.exists('/usr/local/storage/wenzeltn'): # ianscluster + path = '/usr/local/storage/wenzeltn/deepkernel-pytorch2/data/data_holzmuller/' + elif os.path.exists('/home/wenzeltn/'): # laptop + path = '/home/wenzeltn/local_home/deepkernel-pytorch2/data/data_holzmuller/' + + X = np.load(path + ds_name + '/X.npy') + y = np.load(path + ds_name + '/y.npy') + + # Standardization of input according to David H (reduce effect of outliers) + X = (X - X.mean(axis=0, keepdims=True)) / (X.std(axis=0, keepdims=True) + 1e-30) + X = 5 * np.tanh(0.2 * X) + + y = (y - y.mean(axis=0, keepdims=True)) / (y.std(axis=0, keepdims=True) + 1e-30) + + return X, y + + def toyexample(self, ds_name): + """Toy datasets from Tizian created on 04.01.23.""" + + basepath = utilities.get_basepath() + path = basepath + 'data/TOY_selfcreated/' + + X = np.load(path + ds_name + 'X.npy') + y = np.load(path + ds_name + 'y.npy') + + # Standardization of input according to David H (reduce effect of outliers) + X = (X - X.mean(axis=0, keepdims=True)) / (X.std(axis=0, keepdims=True) + 1e-30) + X = 5 * np.tanh(0.2 * X) + + y = (y - y.mean(axis=0, keepdims=True)) / (y.std(axis=0, keepdims=True) + 1e-30) + + return X, y + + + def falkon_datasets(self, ds_name): + """Datasets from FALKON paper.""" + + path = None + if os.path.exists('/usr/local/home/wenzeltn'): # anm03 + path = '/usr/local/home/wenzeltn/deepkernel-pytorch2/data/FALKON_data/' + elif os.path.exists('/usr/local/storage/wenzeltn'): # ianscluster + path = '/usr/local/storage/wenzeltn/deepkernel-pytorch2/data/FALKON_data/' + elif os.path.exists('/home/wenzeltn/'): + pass + + filename = None + if ds_name == 'susy' or ds_name == 'higgs': + if ds_name == 'susy': + filename = 'Susy.mat' + else: + filename = 'Higgs.mat' + + with h5py.File(path + filename, "r") as h5py_file: + arr = np.asarray(h5py_file['X'], dtype=np.float32).T + X = arr[:, 1:] + y = arr[:, 0].reshape(-1, 1) + + # Preprocess input + mtr = np.mean(X, axis=0, dtype=np.float64, keepdims=True).astype(X.dtype) + vtr = np.var(X, axis=0, dtype=np.float64, ddof=1, keepdims=True).astype(X.dtype) + + X -= mtr + X /= vtr + + # Preprocess outputs + y = y * 2 - 1 + + + elif ds_name == 'timit': # num_train_samples = 1124823 + pass + # ToDo: Does not work, dataset probably damaged + + # filename = 'TIMIT.mat' + # + # with h5py.File(path + filename, 'r') as h5py_file: + # dtype = np.float32 + # Xtr = np.array(h5py_file['Xtr'], dtype=dtype) + # Xts = np.array(h5py_file['Xts'], dtype=dtype) + # Ytr = np.array(h5py_file['Ytr'], dtype=dtype).reshape((-1,)) + # Yts = np.array(h5py_file['Yts'], dtype=dtype).reshape((-1,)) + # X = np.concatenate((Xtr, Xts), axis=0) + # y = np.concatenate((Ytr, Yts), axis=0) + # + # + # # + # # f = spio.loadmat(path + filename) + # # dtype = np.float32 + # # Xtr = np.array(f['Xtr'], dtype=dtype) + # # Xts = np.array(f['Xts'], dtype=dtype) + # # Ytr = np.array(f['Ytr'], dtype=dtype).reshape((-1,)) + # # Yts = np.array(f['Yts'], dtype=dtype).reshape((-1,)) + # # X = np.concatenate((Xtr, Xts), axis=0) + # # y = np.concatenate((Ytr, Yts), axis=0) + + # elif ds_name == 'flights': + # filename = 'flights.csv' + # # Not implemented, because I have a .csv file, Giacomo uses an .hdf5 file + # + # elif ds_name == 'taxi': + # pass + # # Not implemented because super large + + # ToDo: Not implemented: Taxi since very large, flights only in .csv, timit is broken + + return X, y + diff --git a/utils/hyperparameters.py b/utils/hyperparameters.py new file mode 100644 index 0000000..02469ee --- /dev/null +++ b/utils/hyperparameters.py @@ -0,0 +1,163 @@ + +class example_func_approx(): + # No noise will be added, because we truly want to optimize to a function. + + maxIter_vkoga = 250 + N_points = 50000 + noise_level = 0 + + reg_para_optim = 1e-5 # for kernel optimization + reg_para_vkoga = 0 # for running VKOGA + learning_rate = 5e-3 + n_epochs = 25 + batch_size = 64 + n_folds = None + + flag_initialize_diagonal = True + flag_symmetric = False + flag_gaussian = False + + n_cross_val = 10 + shape_para = 1 + k_matern = 0 + + +class example_2d(example_func_approx): + pass + +class example_10d_vanishing(example_func_approx): + flag_gaussian = True + +# class example_toy(): +# maxIter_vkoga = 250 +# N_points = 10000 +# noise_level = 1e-3 +# +# reg_para_optim = 1e-5 # for kernel optimization +# reg_para_vkoga = 0 # for running VKOGA +# learning_rate = 5e-3 +# n_epochs = 25 +# batch_size = 64 +# n_folds = None +# +# flag_initialize_diagonal = True +# flag_symmetric = False +# flag_gaussian = False +# +# n_cross_val = 10 +# shape_para = 1 +# k_matern = 0 + + +class example_TUD(): + maxIter_vkoga = 500 + N_points = None + noise_level = 0 + + reg_para_optim = 1e-5 # for kernel optimization + reg_para_vkoga = 0 # for running VKOGA + learning_rate = 5e-3 + n_epochs = 50 + batch_size = 64 + n_folds = None + + flag_initialize_diagonal = True + flag_symmetric = False + flag_gaussian = True + + n_cross_val = 10 + shape_para = 1 + k_matern = 0 + + +class example_holzmuller(): + maxIter_vkoga = 1000 + N_points = None + noise_level = 0 + + reg_para_optim = 1e-3 # for kernel optimization + reg_para_vkoga = 1e-4 # for running VKOGA + learning_rate = 5e-3 + n_epochs = 25 + batch_size = 64 + n_folds = None + + flag_initialize_diagonal = True + flag_symmetric = False + flag_gaussian = False + + n_cross_val = 10 + shape_para = 1 / 5 + k_matern = 0 + +class example_holzmuller_few_epochs(example_holzmuller): + n_epochs = 10 + + + +class example_mnist(example_holzmuller): + n_epochs = 5 + maxIter_vkoga = 200 + +class example_airfoil(example_holzmuller): + n_epochs = 25 + learning_rate = 2e-2 + maxIter_vkoga = 500 + k_matern = 0 + +class example_CCPP(example_holzmuller): + n_epochs = 25 + learning_rate = 2e-2 + maxIter_vkoga = 500 + k_matern = 0 + + + +dic_hyperparams = { + 'example_2d': example_func_approx(), + 'example_2d_tiz': example_func_approx(), + 'example_2d_radial': example_func_approx(), + 'example_2d_active_1': example_func_approx(), + 'example_2d_franke': example_func_approx(), + 'example_3d_active_1': example_func_approx(), + 'example_3d_active_2': example_func_approx(), + 'example_10d_active': example_func_approx(), + 'example_10d_vanishing': example_func_approx(), + 'example_10d_radial': example_func_approx(), + 'example_5d_radial': example_func_approx(), + 'example_5d_active': example_func_approx(), + 'example_5d_faster_conv': example_func_approx(), + 'example_6d_kink': example_func_approx(), + 'example_7d_semiactive': example_func_approx(), + 'example_winkle': example_func_approx(), + 'example_TUD': example_TUD(), + + + 'ct': example_holzmuller_few_epochs(), + 'diamonds': example_holzmuller_few_epochs(), + 'fried': example_holzmuller_few_epochs(), + 'kegg_undir_uci': example_holzmuller_few_epochs(), + 'methane': example_holzmuller_few_epochs(), + 'mlr_knn_rng': example_holzmuller_few_epochs(), + 'online_video': example_holzmuller_few_epochs(), + 'poker': example_holzmuller_few_epochs(), + 'protein': example_holzmuller_few_epochs(), + 'query_agg_count': example_holzmuller_few_epochs(), + 'road_network': example_holzmuller_few_epochs(), + 'sarcos': example_holzmuller_few_epochs(), + 'sgemm': example_holzmuller_few_epochs(), + 'stock': example_holzmuller_few_epochs(), + 'wecs': example_holzmuller_few_epochs(), + + 'susy': example_holzmuller_few_epochs(), + 'higgs': example_holzmuller_few_epochs(), + + + 'example_mnist_01': example_mnist(), + 'example_mnist_34': example_mnist(), + + 'uci_airfoil_self_noise': example_airfoil(), + 'uci_CCPP': example_CCPP(), + +} + diff --git a/utils/kernels.py b/utils/kernels.py new file mode 100644 index 0000000..0d22347 --- /dev/null +++ b/utils/kernels.py @@ -0,0 +1,283 @@ +#!/usr/bin/env python3 + +from abc import ABC, abstractmethod +from scipy.spatial import distance_matrix +import numpy as np +import matplotlib.pyplot as plt + +# Abstract kernel +class Kernel(ABC): + @abstractmethod + def __init__(self): + super().__init__() + + @abstractmethod + def eval(self, x, y): + pass + + def eval_prod(self, x, y, v, batch_size=100): + N = x.shape[0] + num_batches = int(np.ceil(N / batch_size)) + mat_vec_prod = np.zeros((N, 1)) + for idx in range(num_batches): + idx_begin = idx * batch_size + idx_end = (idx + 1) * batch_size + A = self.eval(x[idx_begin:idx_end, :], y) + mat_vec_prod[idx_begin:idx_end] = A @ v + return mat_vec_prod + + @abstractmethod + def diagonal(self, X): + pass + + @abstractmethod + def __str__(self): + pass + + @abstractmethod + def set_params(self, params): + pass + +# Abstract RBF +class RBF(Kernel): + @abstractmethod + def __init__(self): + super(RBF, self).__init__() + + def eval(self, x, y): + return self.rbf(self.ep, distance_matrix(np.atleast_2d(x), np.atleast_2d(y))) + + def diagonal(self, X): + return np.ones(X.shape[0]) * self.rbf(self.ep, 0) + + def __str__(self): + return self.name + ' [gamma = %2.2e]' % self.ep + + def set_params(self, par): + self.ep = par + +# Implementation of concrete RBFs +class Gaussian(RBF): + def __init__(self, ep=1): + self.ep = ep + self.name = 'gauss' + self.rbf = lambda ep, r: np.exp(-(ep * r) ** 2) + +class GaussianTanh(RBF): + def __init__(self, ep=1): + self.ep = ep + self.name = 'gauss_tanh' + self.rbf = lambda ep, r: np.exp(-(ep * np.tanh(r)) ** 2) + +class IMQ(RBF): + def __init__(self, ep=1): + self.ep = ep + self.name = 'imq' + self.rbf = lambda ep, r: 1. / np.sqrt(1 + (ep * r) ** 2) + +class Matern(RBF): + def __init__(self, ep=1, k=0): + self.ep = ep + if k == 0: + self.name = 'mat0' + self.rbf = lambda ep, r : np.exp(-ep * r) + elif k == -1: + self.name = 'derivative kernel ob quadratic matern' + self.rbf = lambda ep, r: np.exp(-r) * (r**2 - (2 * 1 + 3) * r + 1 ** 2 + 2 * 1) + elif k == 1: + self.name = 'mat1' + self.rbf = lambda ep, r: np.exp(-ep * r) * (1 + ep * r) + elif k == 2: + self.name = 'mat2' + self.rbf = lambda ep, r: np.exp(-ep * r) * (3 + 3 * ep * r + 1 * (ep * r) ** 2) + elif k == 3: + self.name = 'mat3' + self.rbf = lambda ep, r: np.exp(-ep * r) * (15 + 15 * ep * r + 6 * (ep * r) ** 2 + 1 * (ep * r) ** 3) + elif k == 4: + self.name = 'mat4' + self.rbf = lambda ep, r: np.exp(-ep * r) * (105 + 105 * ep * r + 45 * (ep * r) ** 2 + 10 * (ep * r) ** 3 + 1 * (ep * r) ** 4) + elif k == 5: + self.name = 'mat5' + self.rbf = lambda ep, r: np.exp(-ep * r) * (945 + 945 * ep * r + 420 * (ep * r) ** 2 + 105 * (ep * r) ** 3 + 15 * (ep * r) ** 4 + 1 * (ep * r) ** 5) + elif k == 6: + self.name = 'mat6' + self.rbf = lambda ep, r: np.exp(-ep * r) * (10395 + 10395 * ep * r + 4725 * (ep * r) ** 2 + 1260 * (ep * r) ** 3 + 210 * (ep * r) ** 4 + 21 * (ep * r) ** 5 + 1 * (ep * r) ** 6) + elif k == 7: + self.name = 'mat7' + self.rbf = lambda ep, r: np.exp(-ep * r) * (135135 + 135135 * ep * r + 62370 * (ep * r) ** 2 + 17325 * (ep * r) ** 3 + 3150 * (ep * r) ** 4 + 378 * (ep * r) ** 5 + 28 * (ep * r) ** 6 + 1 * (ep * r) ** 7) + else: + self.name = None + self.rbf = None + raise Exception('This Matern kernel is not implemented') + +class Wendland(RBF): + def __init__(self, ep=1, k=0, d=1): + self.ep = ep + self.name = 'wen_' + str(d) + '_' + str(k) + l = np.floor(d / 2) + k + 1 + if k == 0: + p = lambda r: 1 + elif k == 1: + p = lambda r: (l + 1) * r + 1 + elif k == 2: + p = lambda r: (l + 3) * (l + 1) * r ** 2 + 3 * (l + 2) * r + 3 + elif k == 3: + p = lambda r: (l + 5) * (l + 3) * (l + 1) * r ** 3 + (45 + 6 * l * (l + 6)) * r ** 2 + (15 * (l + 3)) * r + 15 + elif k == 4: + p = lambda r: (l + 7) * (l + 5) * (l + 3) * (l + 1) * r ** 4 + (5 * (l + 4) * (21 + 2 * l * (8 + l))) * r ** 3 + (45 * (14 + l * (l + 8))) * r ** 2 + (105 * (l + 4)) * r + 105 + else: + raise Exception('This Wendland kernel is not implemented') + c = np.math.factorial(l + 2 * k) / np.math.factorial(l) + e = l + k + self.rbf = lambda ep, r: np.maximum(1 - ep * r, 0) ** e * p(ep * r) / c + + + # Polynomial kernels +class Polynomial(Kernel): + def __init__(self, a=0, p=1): + self.a = a + self.p = p + + def eval(self, x, y): + return (np.atleast_2d(x) @ np.atleast_2d(y).transpose() + self.a) ** self.p + + def diagonal(self, X): + return ((np.linalg.norm(X, axis=1)**2 + self.a) ** self.p) #[:, None] + + def __str__(self): + return 'polynomial' + ' [a = %2.2e, p = %2.2e]' % (self.a, self.p) + + def set_params(self, par): + self.a = par[0] + self.p = par[1] + + +# Polynomial kernels +class BrownianBridge(Kernel): + def __init__(self): + super().__init__() + self.name = 'Brownian Bridge' + + def eval(self, x, y): + return np.minimum(np.atleast_2d(x), np.transpose(np.atleast_2d(y))) - np.atleast_2d(x) * np.transpose(np.atleast_2d(y)) + + def diagonal(self, X): + return X[:, 0] - X[:, 0] ** 2 + + def __str__(self): + return 'Brownian Bridge kernel' + + def set_params(self, par): + pass + +class BrownianMotion(Kernel): + def __init__(self): + super().__init__() + self.name = 'Brownian Motion' + + def eval(self, x, y): + return np.minimum(np.atleast_2d(x), np.transpose(np.atleast_2d(y))) + + def diagonal(self, X): + + + return X.reshape(-1) + + def __str__(self): + return 'Brownian Motion kernel' + + def set_params(self, par): + pass + +# Convolution kernel for Wendland k=0 kernel max(0, 1-|x-y|) on [0,1]. For calculation see 11.02.23\A1. +class ConvKernelWendland(Kernel): + def __init__(self): + super().__init__() + self.name = 'Convolution Kernel Wendland k=0' + + self.func_util = lambda x, y: 1/3 * (-x**3 + 3*x**2*y - 3*x**2 - 3*x*y**2 + 3*x*y + 3/2*x + y**3 - 3*y**2 + 3/2*y + 1) + + def eval(self, x, y): + x = np.atleast_2d(x) + y = np.atleast_2d(y) + + A1 = self.func_util(x, y.transpose()) + A2 = self.func_util(y, x.transpose()).transpose() + + # Calculate matrix of logicals to check where x < y + diff = x - y.transpose() + M = diff <= 0 + + return A1 * M + A2 * ~M + + def diagonal(self, X): + return 1/3 + X[:, 0] - X[:, 0] ** 2 + + def __str__(self): + return 'Convolution Kernel Wendland k=0' + + def set_params(self, par): + pass + + +# Tensor product kernels +class TensorProductKernel(Kernel): + def __init__(self, kernel): + super().__init__() + + self.kernel_1D = kernel + self.name = self.kernel_1D.name + + def eval(self, x, y): + + x = np.atleast_2d(x) + y = np.atleast_2d(y) + + assert x.shape[1] == y.shape[1], 'Dimension do not match' + + array_matrix = np.ones((x.shape[0], y.shape[0])) + + for idx_dim in range(x.shape[1]): + array_matrix = array_matrix * self.kernel_1D.eval(x[:, [idx_dim]], y[:, [idx_dim]]) + + return array_matrix + + def diagonal(self, X): + + X = np.atleast_2d(X) + + array_diagonal = np.ones(X.shape[0]) + + for idx_dim in range(X.shape[1]): + array_diagonal *= self.kernel_1D.diagonal(X[:, [idx_dim]]) + + return array_diagonal + + def __str__(self): + return 'Tensor product kernel for ' + self.name + + def set_params(self, par): + pass + +# A demo usage +def main(): + ker = Gaussian() + + x = np.linspace(-1, 1, 100)[:, None] + y = np.matrix([0]) + A = ker.eval(x, y) + + + fig = plt.figure(1) + fig.clf() + ax = fig.gca() + ax.plot(x, A) + ax.set_title('A kernel: ' + str(ker)) + fig.show() + + +if __name__ == '__main__': + main() + + + diff --git a/utils/main_function.py b/utils/main_function.py new file mode 100644 index 0000000..ea871ee --- /dev/null +++ b/utils/main_function.py @@ -0,0 +1,285 @@ + + +from P36_Francesco_Emma.models.optimized_kernel import OptimizedKernel +from P36_Francesco_Emma.utilities.dataset_collection import Dataset +from P36_Francesco_Emma.utilities.hyperparameters import dic_hyperparams + +from vkoga import tkernels, kernels +import torch +from matplotlib import pyplot as plt +import numpy as np +import time + +from sklearn.model_selection import train_test_split +from vkoga.kernels import Matern +from vkoga.vkoga import VKOGA +import os + +from datetime import datetime +import utilities + + +## Some settings +list_nctrs = [int(np.round(nr)) for nr in np.logspace(np.log(10) / np.log(10), np.log(1000) / np.log(10), 10)] + +basepath = utilities.get_basepath() +path_for_indices = basepath + 'data/' + + +# Main functiono to run kernel optimization with subsequent vkoga +def run_everything(name_dataset, maxIter_vkoga, N_points, noise_level, reg_para_optim, reg_para_vkoga, + learning_rate, n_epochs, batch_size, n_folds, + flag_initialize_diagonal=False, flag_symmetric_A=False, flag_gaussian=True, + flag_vkoga_verbose=False, flag_plots=False, flag_optim_verbose=True, + flag_std_vkoga=True, idx_rerun=None): + + dic_timings = {} + + ## Load dataset + dataset = Dataset(N_points=N_points) + X, y = dataset.get_data(name_dataset) + hyperparameter = dic_hyperparams[name_dataset] + + # Preprocessing + if 'example' in name_dataset: + idx = np.arange(X.shape[0]) + else: + assert idx_rerun is not None, 'idx_rerun is not set!' + idx = np.load(path_for_indices + '_indices_{}/indices_'.format(idx_rerun) + name_dataset + '.npy') + + n_train = int(.8 * X.shape[0]) + + X_train = X[idx[:n_train]] + y_train = y[idx[:n_train]] + X_test = X[idx[n_train:]] + y_test = y[idx[n_train:]] + X_train_torch, y_train_torch = torch.from_numpy(X_train).type(torch.float), torch.from_numpy(y_train).type(torch.float) + # ToDo: noise level removed! (was set to 0 so far!) + + + ## Select kernel + if flag_gaussian: + kernel_t = tkernels.Gaussian(ep=hyperparameter.shape_para / np.sqrt(X_train.shape[1])) + kernel = kernels.Gaussian(ep=hyperparameter.shape_para /np.sqrt(X_train.shape[1])) + else: + kernel_t = tkernels.Matern(k=hyperparameter.k_matern, ep=hyperparameter.shape_para / np.sqrt(X_train.shape[1])) + kernel = kernels.Matern(k=hyperparameter.k_matern, ep=hyperparameter.shape_para / np.sqrt(X_train.shape[1])) + + + + ## Optimize kernel model + model = OptimizedKernel(kernel=kernel_t, dim=X_train_torch.shape[1], + reg_para=reg_para_optim, + learning_rate=learning_rate, + n_epochs=n_epochs, + batch_size=batch_size, + n_folds=n_folds, + flag_initialize_diagonal=flag_initialize_diagonal, + flag_symmetric_A=flag_symmetric_A) + + A_start = torch.clone(model.A).detach().numpy() + + t_optim_start = time.time() + print(datetime.now().strftime("%H:%M:%S"), name_dataset, '2layered kernel optimization started.') + model.optimize(X_train_torch, y_train_torch, flag_optim_verbose=flag_optim_verbose) + print(datetime.now().strftime("%H:%M:%S"), name_dataset, '2layered kernel optimization finished.') + t_optim_stop = time.time() + + dic_timings['2L_optim'] = t_optim_stop - t_optim_start + + ## Application of VKOGA + # VKOGA with modified modified kernel, simply pre-apply the linear transformation + model_vkoga1 = VKOGA(kernel=kernel, greedy_type='f_greedy', + verbose=flag_vkoga_verbose, reg_par=reg_para_vkoga) + t_vkoga1_start = time.time() + print(datetime.now().strftime("%H:%M:%S"), name_dataset, '2layered VKOGA started.') + _ = model_vkoga1.fit(X_train @ model.A.detach().numpy(), y_train, + X_val=X_test @ model.A.detach().numpy(), y_val=y_test, + maxIter=maxIter_vkoga) + print(datetime.now().strftime("%H:%M:%S"), name_dataset, '2layered VKOGA finished.') + t_vkoga1_stop = time.time() + + dic_timings['2L_vkoga'] = t_vkoga1_stop - t_vkoga1_start + + # VKOGA with standard kernel + model_vkoga2 = VKOGA(kernel=kernel, greedy_type='f_greedy', + verbose=flag_vkoga_verbose, reg_par=reg_para_vkoga) + t_vkoga2_start = time.time() + print(datetime.now().strftime("%H:%M:%S"), name_dataset, 'Standard VKOGA started.') + if flag_std_vkoga: + _ = model_vkoga2.fit(X_train, y_train, + X_val=X_test, y_val=y_test, + maxIter=maxIter_vkoga) + print(datetime.now().strftime("%H:%M:%S"), name_dataset, 'Standard VKOGA finished.') + else: + print(datetime.now().strftime("%H:%M:%S"), name_dataset, 'Standard VKOGA was skipped.') + + t_vkoga2_stop = time.time() + + + + ## Some analysis: Print timings and error + mse_test_error_vkoga1 = np.linalg.norm(model_vkoga1.predict(X_test @ model.A.detach().numpy()) - y_test) ** 2 / y_test.shape[0] + if flag_std_vkoga: + mse_test_error_vkoga2 = np.linalg.norm(model_vkoga2.predict(X_test) - y_test) ** 2 / y_test.shape[0] + else: + mse_test_error_vkoga2 = np.linalg.norm(y_test) ** 2 / y_test.shape[0] + + + print(name_dataset, 'Deep Kernel Optimization {:.3f}s.'.format(t_optim_stop - t_optim_start)) + print(name_dataset, 'Deep VKOGA runtime: {:.3f}s.'.format(t_vkoga1_stop - t_vkoga1_start)) + print(name_dataset, 'Standard VKOGA runtime: {:.3f}s.'.format(t_vkoga2_stop - t_vkoga2_start)) + print(' -----------------------------------------') + print(name_dataset, 'Deep kernel test MSE: {:.3e}'.format(mse_test_error_vkoga1)) + if flag_std_vkoga: + print(name_dataset, 'Standard kernel test MSE: {:.3e}'.format(mse_test_error_vkoga2)) + else: + print(name_dataset, 'Standard kernel test MSE: skipped.') + print(' ') + + ## Compute (intermediate) test rmse + list_nctrs_rmse = list(np.array(list_nctrs)[np.array(list_nctrs) <= maxIter_vkoga]) + array_test_rmse = np.zeros((1, len(list_nctrs_rmse))) + + print(datetime.now().strftime("%H:%M:%S"), name_dataset, 'Started computation of test errors.') + for idx_nctrs, nctrs in enumerate(list_nctrs_rmse): # centers need not to be transformed by matrix A !! + y_test_pred = kernel.eval(X_test @ model.A.detach().numpy(), model_vkoga1.ctrs_[:nctrs]) \ + @ model_vkoga1.Cut_[:nctrs, :nctrs].transpose() @ model_vkoga1.c[:nctrs] + + array_test_rmse[0, idx_nctrs] = \ + np.linalg.norm(y_test - y_test_pred) ** 2 / y_test.shape[0] + print(datetime.now().strftime("%H:%M:%S"), name_dataset, 'Finished computation of test errors.') + + + ## Some visualizations + array_concatenate = np.stack(model.list_parameters, 2) + if flag_plots: + # Change of parameters of linear layer + n_epochs = array_concatenate.shape[-1] + + idx_figure_offset = 0 + plt.figure(idx_figure_offset + 5) + plt.clf() + list_legend = [] + for k in range(array_concatenate.shape[0]): + plt.plot(np.arange(1, n_epochs + 1), array_concatenate[k, k, :]) + list_legend.append('A[{},{}]'.format(k, k)) + plt.plot(np.arange(1, n_epochs + 1), array_concatenate[0, 1, :], '--') + plt.plot(np.arange(1, n_epochs + 1), array_concatenate[1, 0, :], '--') + plt.legend(list_legend + ['A[0,1]', 'A[1,0]']) + plt.title('matrix entries') + plt.xscale('log') + plt.xlabel('iterations') + plt.draw() + + # Decay of optimization objective + plt.figure(idx_figure_offset + 1) + plt.clf() + plt.plot(np.arange(1, len(model.list_obj) + 1), model.list_obj) + plt.xscale('log') + plt.yscale('log') + + + # Decay of vkoga quantities + plt.figure(idx_figure_offset + 2) + plt.clf() + plt.plot(np.arange(1, len(model_vkoga1.train_hist['f']) + 1), model_vkoga1.train_hist['f'], 'm') + plt.plot(np.arange(1, len(model_vkoga2.train_hist['f']) + 1), model_vkoga2.train_hist['f'], 'b') + plt.plot(np.arange(1, len(model_vkoga1.train_hist['f val']) + 1), model_vkoga1.train_hist['f val'], 'm--') + plt.plot(np.arange(1, len(model_vkoga2.train_hist['f val']) + 1), model_vkoga2.train_hist['f val'], 'b--') + plt.plot(np.arange(1, len(model_vkoga1.train_hist['f val']) + 1), model_vkoga1.train_hist['f val'], 'm--') + plt.xscale('log') + plt.yscale('log') + plt.legend(['2 layer kernel', 'standard']) + + # Plot of predictions + plt.figure(idx_figure_offset + 3) + plt.clf() + plt.plot(y_train, 'k') + plt.plot(model_vkoga1.predict(X_train @ model.A.detach().numpy()), 'm') + plt.plot(model_vkoga2.predict(X_train), 'b') + plt.legend(['true', 'deep kernel', 'standard kernel']) + plt.title('Train set predictions') + + plt.figure(idx_figure_offset + 4) + plt.clf() + plt.plot(y_test, 'k') + plt.plot(model_vkoga1.predict(X_test @ model.A.detach().numpy()), 'm') + plt.plot(model_vkoga2.predict(X_test), 'b') + plt.legend(['true', 'deep kernel', 'standard kernel']) + plt.title('Test set predictions') + + plt.figure(idx_figure_offset + 40) + plt.clf() + # plt.plot(y_test, 'k') + plt.plot(model_vkoga1.predict(X_test @ model.A.detach().numpy()).reshape(-1) - y_test.reshape(-1), 'm') + plt.plot(model_vkoga2.predict(X_test).reshape(-1) - y_test.reshape(-1), 'b') + plt.legend(['deep kernel', 'standard kernel']) + plt.title('Test set residuals') + + + + data = [X_train, X_test, y_train, y_test] + + return A_start, model.A.detach().numpy(), model, model_vkoga1, model_vkoga2, \ + data, array_concatenate, array_test_rmse, dic_timings + + + +def run_cross_validation(name_dataset, data, maxIter_vkoga, flag_gaussian=True, n_cross_val=5, + flag_vkoga_verbose=False): + + # Unpack data, some settings + [X_train, X_test, y_train, y_test] = data + hyperparameter = dic_hyperparams[name_dataset] + + lower_bound = .05 + upper_bound = 10 + + array_eps = np.logspace(np.log(lower_bound) / np.log(10), + np.log(upper_bound) / np.log(10), n_cross_val) + list_nctrs_rmse = list(np.array(list_nctrs)[np.array(list_nctrs) <= maxIter_vkoga]) + + array_test_rmse = np.zeros((n_cross_val, len(list_nctrs_rmse))) + + # Prepare for computation + list_timings = [] + array_f = np.zeros((maxIter_vkoga, n_cross_val)) + array_f_val = np.zeros((maxIter_vkoga, n_cross_val)) + + for idx_para, para_ep in enumerate(array_eps): + # Select kernel + if flag_gaussian: + kernel = kernels.Gaussian(ep=para_ep * 1 / np.sqrt(X_train.shape[1])) + else: + kernel = kernels.Matern(k=hyperparameter.k_matern, ep=para_ep * 1 / np.sqrt(X_train.shape[1])) + + # VKOGA with standard kernel + model_vkoga = VKOGA(kernel=kernel, greedy_type='f_greedy', verbose=flag_vkoga_verbose) + t_vkoga_start = time.time() + _ = model_vkoga.fit(X_train, y_train, + X_val=X_test, y_val=y_test, + maxIter=maxIter_vkoga) + t_vkoga_stop = time.time() + + + array_f[:, idx_para] = model_vkoga.train_hist['f'] + array_f_val[:, idx_para] = model_vkoga.train_hist['f val'] + + + list_timings.append(t_vkoga_stop - t_vkoga_start) + + print(datetime.now().strftime("%H:%M:%S"), name_dataset, 'kernel_par = {:.2e} run took {:.3f}s.'.format(para_ep, t_vkoga_stop - t_vkoga_start)) + + ## Compute (intermediate) test rmse + print(datetime.now().strftime("%H:%M:%S"), name_dataset, 'Started computation of test errors.') + for idx_nctrs, nctrs in enumerate(list_nctrs_rmse): + y_test_pred = kernel.eval(X_test, model_vkoga.ctrs_[:nctrs]) \ + @ model_vkoga.Cut_[:nctrs, :nctrs].transpose() @ model_vkoga.c[:nctrs] + + array_test_rmse[idx_para, idx_nctrs] = \ + np.linalg.norm(y_test - y_test_pred)**2 / y_test.shape[0] + print(datetime.now().strftime("%H:%M:%S"), name_dataset, 'Finished computation of test errors.') + + return array_eps, array_f, array_f_val, array_test_rmse, list_timings + diff --git a/utils/optimized_kernel.py b/utils/optimized_kernel.py new file mode 100644 index 0000000..423faf4 --- /dev/null +++ b/utils/optimized_kernel.py @@ -0,0 +1,117 @@ +from torch import nn +import torch +from P36_Francesco_Emma.utilities.cv_rippa_ext import compute_cv_loss_via_rippa_ext_2 +import numpy as np + + + + +class OptimizedKernel(torch.nn.Module): + ''' + Class for optimizing a two-layered kernel, i.e. to optimize the first-layer matrix A. + ''' + + def __init__(self, kernel, dim, + reg_para=1e-5, learning_rate=1e-3, n_epochs=100, batch_size=32, n_folds=None, + flag_initialize_diagonal=False, flag_symmetric_A=False): + super().__init__() + + # Some settings, mostly optimization related + self.kernel = kernel + + self.dim = dim + self.reg_para = reg_para + self.learning_rate = learning_rate + self.n_epochs = n_epochs + self.batch_size = batch_size + + self.flag_symmetric_A = flag_symmetric_A + + # Define linear maps - hardcoded + if torch.is_tensor(flag_initialize_diagonal): + self.B = nn.Parameter(flag_initialize_diagonal, requires_grad=True) + elif flag_initialize_diagonal: + self.B = nn.Parameter(torch.eye(self.dim, self.dim), requires_grad=True) + else: + self.B = nn.Parameter(torch.rand(self.dim, self.dim), requires_grad=True) + + if self.flag_symmetric_A: + self.A = (self.B + self.B.t()) / 2 + else: + self.A = self.B + + + if n_folds is None: + self.n_folds = self.batch_size + else: + self.n_folds = n_folds + + + # Set optimizer and scheduler + self.optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate) + self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=50, gamma=.7) + + # Initliaze lists from tracking + self.list_obj = [] + self.list_parameters = [] + + + def optimize(self, X, y, flag_optim_verbose=True): + + assert X.shape[0] == y.shape[0], 'Data sizes do not match' + n_batches = X.shape[0] // self.batch_size + + # Append initial parameters + if self.flag_symmetric_A: + self.list_parameters.append(torch.clone((self.B + self.B.t()) / 2).detach().numpy()) + else: + self.list_parameters.append(torch.clone(self.A).detach().numpy()) + + + for idx_epoch in range(self.n_epochs): + shuffle = np.random.permutation(X.shape[0]) # reshuffle the data set every epoch + + list_obj_loc = [] + + for idx_batch in range(n_batches): + + # Select minibatch from the data + ind = shuffle[idx_batch * self.batch_size : (idx_batch + 1) * self.batch_size] + Xb, yb = X[ind, :], y[ind, :] + + # Compute kernel matrix for minibatch + kernel_matrix = self.kernel.eval(Xb @ self.A, Xb @ self.A) + + # use cross validation loss via rippa to assess the error + optimization_objective, _ = compute_cv_loss_via_rippa_ext_2( + kernel_matrix, yb, self.n_folds, self.reg_para) + + # Keep track of optimization quantity within epoch + list_obj_loc.append(optimization_objective.detach().item()) + if idx_epoch == 0 and flag_optim_verbose: + print('First epoch: Iteration {:5d}: Training objective: {:.3e}'.format( + idx_batch, optimization_objective.detach().item())) + + # Do optimization stuff + optimization_objective.backward() + self.optimizer.step() # do one optimization step + self.optimizer.zero_grad() # set gradients to zero + + if self.flag_symmetric_A: + self.A = (self.B + self.B.t()) / 2 + else: + self.A = self.B + + # Keep track of some quantities and print something + mean_obj = np.mean(list_obj_loc) + + if flag_optim_verbose: + print('Epoch {:5d} finished, mean training objective: {:.3e}.'.format( + idx_epoch + 1, mean_obj)) + + self.list_obj.append(mean_obj) + + self.list_parameters.append(torch.clone(self.A).detach().numpy()) + + + diff --git a/utils/tkernels.py b/utils/tkernels.py new file mode 100644 index 0000000..26e2660 --- /dev/null +++ b/utils/tkernels.py @@ -0,0 +1,195 @@ +#!/usr/bin/env python3 + +# Torch implementation of the kernels!! +# import sys +# sys.path.append('/usr/local/lib/python3.7/dist-packages/') +import torch +from abc import ABC, abstractmethod +import matplotlib.pyplot as plt +import numpy as np + + +# Abstract kernel +class Kernel(ABC): + @abstractmethod + def __init__(self): + super().__init__() + + @abstractmethod + def eval(self): + pass + + @abstractmethod + def diagonal(self, X): + pass + + @abstractmethod + def __str__(self): + pass + + @abstractmethod + def set_params(self, params): + pass + + +# Abstract RBF +class RBF(Kernel): + @abstractmethod + def __init__(self): + super(RBF, self).__init__() + + def eval(self, x, y): + return self.rbf(self.ep, + torch.cdist(x, y)) + + def diagonal(self, X): + return torch.ones(X.shape[0], 1) * self.rbf(self.ep, torch.tensor(0.0)) + + def __str__(self): + return self.name + ' [gamma = %2.2e]' % self.ep + + def set_params(self, par): + self.ep = par + + +# Implementation of concrete RBFs +class Gaussian(RBF): + def __init__(self, ep=1): + self.ep = ep + self.name = 'gauss' + self.rbf = lambda ep, r: torch.exp(-(ep * r) ** 2) + + +class GaussianTanh(RBF): + def __init__(self, ep=1): + self.ep = ep + self.name = 'gauss_tanh' + self.rbf = lambda ep, r: torch.exp(-(ep * torch.tanh(r)) ** 2) + + +class IMQ(RBF): + def __init__(self, ep=1): + self.ep = ep + self.name = 'imq' + self.rbf = lambda ep, r: 1. / torch.sqrt(1 + (ep * r) ** 2) + + +class Matern(RBF): + def __init__(self, ep=1, k=0): + self.ep = ep + if k == 0: + self.name = 'mat0' + self.rbf = lambda ep, r: torch.exp(-ep * r) + elif k == 1: + self.name = 'mat1' + self.rbf = lambda ep, r: torch.exp(-ep * r) * (1 + ep * r) + elif k == 2: + self.name = 'mat2' + self.rbf = lambda ep, r: torch.exp(-ep * r) * (3 + 3 * ep * r + (ep * r) ** 2) + elif k == 3: + self.name = 'mat3' + self.rbf = lambda ep, r: torch.exp(-ep * r) * (15 + 15 * ep * r + 6 * (ep * r) ** 2 + 1 * (ep * r) ** 3) + elif k == 4: + self.name = 'mat4' + self.rbf = lambda ep, r: torch.exp(-ep * r) * ( + 105 + 105 * ep * r + 45 * (ep * r) ** 2 + 10 * (ep * r) ** 3 + 1 * (ep * r) ** 4) + elif k == 5: + self.name = 'mat5' + self.rbf = lambda ep, r: torch.exp(-ep * r) * ( + 945 + 945 * ep * r + 420 * (ep * r) ** 2 + 105 * (ep * r) ** 3 + 15 * (ep * r) ** 4 + 1 * ( + ep * r) ** 5) + elif k == 6: + self.name = 'mat6' + self.rbf = lambda ep, r: torch.exp(-ep * r) * ( + 10395 + 10395 * ep * r + 4725 * (ep * r) ** 2 + 1260 * (ep * r) ** 3 + 210 * ( + ep * r) ** 4 + 21 * (ep * r) ** 5 + 1 * (ep * r) ** 6) + elif k == 7: + self.name = 'mat7' + self.rbf = lambda ep, r: torch.exp(-ep * r) * ( + 135135 + 135135 * ep * r + 62370 * (ep * r) ** 2 + 17325 * (ep * r) ** 3 + 3150 * ( + ep * r) ** 4 + 378 * (ep * r) ** 5 + 28 * (ep * r) ** 6 + 1 * (ep * r) ** 7) + else: + self.name = None + self.rbf = None + raise Exception('This Matern kernel is not implemented') + + +class HatFunction(RBF): + def __init__(self, ep=1): + self.ep = ep + self.rbf = lambda ep, r: torch.clamp(1 - ep*r, min=0) + self.name = 'Hat function' + + +class PowerKernels(RBF): + def __init__(self, ep=1): + self.ep = ep + self.rbf = lambda ep, r: torch.abs(r) + self.name = 'Power kernel' + + +class Wendland(RBF): + def __init__(self, ep=1, k=0, d=1): + self.ep = ep + self.name = 'wen_' + str(d) + '_' + str(k) + l = np.floor(d / 2) + k + 1 + if k == 0: + p = lambda r: 1 + elif k == 1: + p = lambda r: (l + 1) * r + 1 + elif k == 2: + p = lambda r: (l + 3) * (l + 1) * r ** 2 + 3 * (l + 2) * r + 3 + elif k == 3: + p = lambda r: (l + 5) * (l + 3) * (l + 1) * r ** 3 + (45 + 6 * l * (l + 6)) * r ** 2 + ( + 15 * (l + 3)) * r + 15 + elif k == 4: + p = lambda r: (l + 7) * (l + 5) * (l + 3) * (l + 1) * r ** 4 + ( + 5 * (l + 4) * (21 + 2 * l * (8 + l))) * r ** 3 + (45 * (14 + l * (l + 8))) * r ** 2 + ( + 105 * (l + 4)) * r + 105 + else: + raise Exception('This Wendland kernel is not implemented') + c = np.math.factorial(l + 2 * k) / np.math.factorial(l) + e = l + k + # self.rbf = lambda ep, r: np.maximum(1 - ep * r, 0) ** e * p(ep * r) / c + self.rbf = lambda ep, r: torch.clamp(1 - ep * r, min=0) ** e * p(ep * r) / c + + +# # Polynomial kernels +# class Polynomial(Kernel): +# def __init__(self, a=0, p=1): +# self.a = a +# self.p = p +# +# def eval(self, x, y): +# return (np.atleast_2d(x) @ np.atleast_2d(y).transpose() + self.a) ** self.p +# +# def diagonal(self, X): +# return ((np.linalg.norm(X, axis=1) + self.a) ** self.p)[:, None] +# +# def __str__(self): +# return 'polynomial' + ' [a = %2.2e, p = %2.2e]' % (self.a, self.p) +# +# def set_params(self, par): +# self.a = par[0] +# self.p = par[1] + +# A demo usage +def main(): + ker = Gaussian() + + x = torch.linspace(-5, 5, 10001)[:, None] + y = torch.zeros(1, 1) + A = ker.eval(x, y) + B = torch.exp(-x ** 2) + + fig = plt.figure(1) + fig.clf() + ax = fig.gca() + ax.plot(x, A) + ax.plot(x, B) + ax.set_title('A kernel: ' + str(ker)) + plt.show() + + +if __name__ == '__main__': + main() diff --git a/utils/vkoga.py b/utils/vkoga.py new file mode 100644 index 0000000..1731605 --- /dev/null +++ b/utils/vkoga.py @@ -0,0 +1,354 @@ +#!/usr/bin/env python3 + +from vkoga.kernels import Gaussian +import numpy as np +from sklearn.base import BaseEstimator +from sklearn.utils.validation import check_X_y, check_array, check_is_fitted +from scipy.spatial import distance_matrix + +# VKOGA implementation +class VKOGA(BaseEstimator): + + def __init__(self, kernel=Gaussian(), kernel_par=1, + verbose=True, n_report=10, + greedy_type='p_greedy', reg_par=0, restr_par=0, + tol_f=1e-10, tol_p=1e-10): + + # Set the verbosity on/off + self.verbose = verbose + + # Set the frequency of report + self.n_report = n_report + + # Set the params defining the method + self.kernel = kernel + self.kernel_par = kernel_par + self.greedy_type = greedy_type + self.reg_par = reg_par + self.restr_par = restr_par + + self.flag_val = None + + # Set the stopping values + self.tol_f = tol_f + self.tol_p = tol_p + + # Some further settings + self.ctrs_ = None + self.Cut_ = None + self.c = None + + + # Initialize the convergence history + self.train_hist = {} + self.train_hist['n'] = [] + self.train_hist['f'] = [] + self.train_hist['p'] = [] + self.train_hist['p selected'] = [] # list of selected power vals + self.train_hist['f val'] = [] + self.train_hist['p val'] = [] + + def selection_rule(self, f, p): + if self.restr_par > 0: + p_ = np.max(p) + restr_idx = np.nonzero(p >= self.restr_par * p_)[0] + else: + restr_idx = np.arange(len(p)) + + f = np.sum(f ** 2, axis=1) + if self.greedy_type == 'f_greedy': + idx = np.argmax(f[restr_idx]) + idx = restr_idx[idx] + f_max = np.max(f) + p_max = np.max(p) + elif self.greedy_type == 'fp_greedy': + idx = np.argmax(f[restr_idx] / p[restr_idx]) + idx = restr_idx[idx] + f_max = np.max(f) + p_max = np.max(p) + elif self.greedy_type == 'p_greedy': + f_max = np.max(f) + idx = np.argmax(p) + p_max = p[idx] + elif self.greedy_type == 'rand': # pick some random point - David Holzmüller asked me about its performance + f_max = np.max(f) + p_max = np.max(p) + idx = np.random.randint(len(p)) + return idx, f_max, p_max + + def fit(self, X, y, X_val=None, y_val=None, maxIter=None): + # Check the datasets + X, y = check_X_y(X, y, multi_output=True) + if len(y.shape) == 1: + y = y[:, None] + + if X_val is None or y_val is None: + X_val = None + y_val = None + self.flag_val = False + else: + self.flag_val = True + X_val, y_val = check_X_y(X_val, y_val, multi_output=True) + # We will assume in the following that X_val and X are disjoint + + if len(y_val.shape) == 1: + y_val = y_val[:, None] + + # Check whether already fitted + if self.ctrs_ is None: + self.ctrs_ = np.zeros((0, X.shape[1])) + self.Cut_ = np.zeros((0, 0)) + self.c = np.zeros((0, y.shape[1])) + + + # Check whether "new X" and previously chosen centers overlap + list_truly_new_X = [] + if self.ctrs_.shape[0] > 0: + for idx_x in range(X.shape[0]): + if min(np.linalg.norm(self.ctrs_ - X[idx_x, :], axis=1)) < 1e-10: + continue + else: + list_truly_new_X.append(idx_x) + else: + list_truly_new_X = list(range(X.shape[0])) + X = X[list_truly_new_X, :] + y = y[list_truly_new_X, :] + + + + # Initialize the residual and update the given y values by substracting the current model + y = np.array(y) + if len(y.shape) == 1: + y = y[:, None] + y = y - self.predict(X) + if self.flag_val: + y_val = y_val - self.predict(X_val) + + + # Get the data dimension + N, q = y.shape + if self.flag_val: + N_val = y_val.shape[0] + + + # Set maxIter_continue + if maxIter is None or maxIter > N: + self.maxIter = 100 + else: + self.maxIter = maxIter + + + # Check compatibility of restriction + if self.greedy_type == 'p_greedy': + self.restr_par = 0 + if not self.reg_par == 0: + self.restr_par = 0 + + + # Initialize list for the chosen and non-chosen indices + indI_ = [] + notIndI = list(range(N)) + c = np.zeros((self.maxIter, q)) + + + # Compute the Newton basis values (related to the old centers) on the new X + Vx_new_X_old_ctrs = self.kernel.eval(X, self.ctrs_) @ self.Cut_.transpose() + if self.flag_val: + Vx_val_new_X_old_ctrs = self.kernel.eval(X_val, self.ctrs_) @ self.Cut_.transpose() + + + # Initialize arrays for the Newton basis values (related to the new centers) on the new X + Vx = np.zeros((N, self.maxIter)) + if self.flag_val: + Vx_val = np.zeros((N_val, self.maxIter)) + + + # Compute the powervals on X and X_val + p = self.kernel.diagonal(X) + self.reg_par + p = p - np.sum(Vx_new_X_old_ctrs ** 2, axis=1) + if self.flag_val: + p_val = self.kernel.diagonal(X_val) + self.reg_par + p_val = p_val - np.sum(Vx_val_new_X_old_ctrs ** 2, axis=1) + + + # Extend Cut_ matrix, i.e. continue to build on old self.Cut_ matrix + N_ctrs_so_far = self.Cut_.shape[0] + Cut_ = np.zeros((N_ctrs_so_far + self.maxIter, N_ctrs_so_far + self.maxIter)) + Cut_[:N_ctrs_so_far, :N_ctrs_so_far] = self.Cut_ + + + # Iterative selection of new points + self.print_message('begin') + for n in range(self.maxIter): + # prepare + self.train_hist['n'].append(self.ctrs_.shape[0] + n + 1) + self.train_hist['f'].append([]) + self.train_hist['p'].append([]) + self.train_hist['p selected'].append([]) + if self.flag_val: + self.train_hist['p val'].append([]) + self.train_hist['f val'].append([]) + + # select the current index + idx, self.train_hist['f'][-1], self.train_hist['p'][-1] = self.selection_rule(y[notIndI], p[notIndI]) + self.train_hist['p selected'][-1] = p[notIndI[idx]] + if self.flag_val: + self.train_hist['p val'][-1] = np.max(p_val) + self.train_hist['f val'][-1] = np.max(np.sum(y_val ** 2, axis=1)) + + # add the current index + indI_.append(notIndI[idx]) + + # check if the tolerances are reacheded + if self.train_hist['f'][n] <= self.tol_f: + n = n - 1 + self.print_message('end') + break + if self.train_hist['p'][n] <= self.tol_p: + n = n - 1 + self.print_message('end') + break + + # compute the nth basis (including normalization)# ToDo: Also old Vx need to be substracted here! + Vx[notIndI, n] = self.kernel.eval(X[notIndI, :], X[indI_[n], :])[:, 0]\ + - Vx_new_X_old_ctrs[notIndI, :] @ Vx_new_X_old_ctrs[indI_[n], :].transpose()\ + - Vx[notIndI, :n+1] @ Vx[indI_[n], 0:n+1].transpose() + Vx[indI_[n], n] += self.reg_par + Vx[notIndI, n] = Vx[notIndI, n] / np.sqrt(p[indI_[n]]) + + if self.flag_val: + Vx_val[:, n] = self.kernel.eval(X_val, X[indI_[n], :])[:, 0]\ + - Vx_val_new_X_old_ctrs[:, :] @ Vx_new_X_old_ctrs[indI_[n], :].transpose()\ + - Vx_val[:, :n+1] @ Vx[indI_[n], 0:n+1].transpose() + Vx_val[:, n] = Vx_val[:, n] / np.sqrt(p[indI_[n]]) + + + # update the change of basis + Cut_new_row = np.ones(N_ctrs_so_far + n + 1) + Cut_new_row[:N_ctrs_so_far + n] = \ + -np.concatenate((Vx_new_X_old_ctrs[indI_[n], :], Vx[indI_[n], :n])) \ + @ Cut_[:N_ctrs_so_far + n, :N_ctrs_so_far + n] + Cut_[N_ctrs_so_far + n, :N_ctrs_so_far + n + 1] = Cut_new_row / Vx[indI_[n], n] + + # compute the nth coefficient + c[n] = y[indI_[n]] / np.sqrt(p[indI_[n]]) + + # update the power function + p[notIndI] = p[notIndI] - Vx[notIndI, n] ** 2 + if self.flag_val: + p_val = p_val - Vx_val[:, n] ** 2 + + # update the residual + y[notIndI] = y[notIndI] - Vx[notIndI, n][:, None] * c[n] + if self.flag_val: + y_val = y_val - Vx_val[:, n][:, None] * c[n] + + # remove the nth index from the dictionary + notIndI.pop(idx) + + # Report some data every now and then + if n % self.n_report == 0: + self.print_message('track') + + else: + self.print_message('end') + + # Define coefficients and centers + self.c = np.concatenate((self.c, c[:n + 1])) + self.Cut_ = Cut_[:N_ctrs_so_far + n + 1, :N_ctrs_so_far + n + 1] + self.indI_ = indI_[:n + 1] # Mind: These are only the indices of the latest points + self.coef_ = self.Cut_.transpose() @ self.c + self.ctrs_ = np.concatenate((self.ctrs_, X[self.indI_, :]), axis=0) + + + return self + + + def predict(self, X): + # Check is fit has been called + # check_is_fitted(self, 'coef_') # ToDo: Remove this one! + + # Validate the input + X = check_array(X) + + # Compute prediction + if self.ctrs_.shape[0] > 0: + prediction = self.kernel.eval(X, self.ctrs_) @ self.coef_ + else: + prediction = np.zeros((X.shape[0], 1)) + + return prediction + ### TODO: replace with eval prod + + def predict_P(self, X, n=None): + # Prediction of the power function, copied from pgreedy.py + + # Try to do nothing + if self.ctrs_ is None or n == 0: + return self.kernel.diagonal(X) + + # Otherwise check if everything is ok + # Check is fit has been called + check_is_fitted(self, 'ctrs_') + # Validate the input + X = check_array(X) + + # Decide how many centers to use + if n is None: + n = np.atleast_2d(self.ctrs_).shape[0] + + # Evaluate the power function on the input + p = self.kernel.diagonal(X) - np.sum( + (self.kernel.eval(X, np.atleast_2d(self.ctrs_)[:n]) @ self.Cut_[:n, :n].transpose()) ** 2, axis=1) + + return p + + def print_message(self, when): + if self.verbose and when == 'begin': + print('') + print('*' * 30 + ' [VKOGA] ' + '*' * 30) + print('Training model with') + print(' |_ kernel : %s' % self.kernel) + print(' |_ regularization par. : %2.2e' % self.reg_par) + print(' |_ restriction par. : %2.2e' % self.restr_par) + print('') + + if self.verbose and when == 'end': + print('Training completed with') + print(' |_ selected points : %8d / %8d' % (self.train_hist['n'][-1], self.ctrs_.shape[0] + self.maxIter)) + if self.flag_val: + print(' |_ train, val residual : %2.2e / %2.2e, %2.2e' % + (self.train_hist['f'][-1], self.tol_f, self.train_hist['f val'][-1])) + print(' |_ train, val power fun: %2.2e / %2.2e, %2.2e' % + (self.train_hist['p'][-1], self.tol_p, self.train_hist['p val'][-1])) + else: + print(' |_ train residual : %2.2e / %2.2e' % (self.train_hist['f'][-1], self.tol_f)) + print(' |_ train power fun : %2.2e / %2.2e' % (self.train_hist['p'][-1], self.tol_p)) + + if self.verbose and when == 'track': + print('Training ongoing with') + print(' |_ selected points : %8d / %8d' % (self.train_hist['n'][-1], self.ctrs_.shape[0] + self.maxIter)) + if self.flag_val: + print(' |_ train, val residual : %2.2e / %2.2e, %2.2e' % + (self.train_hist['f'][-1], self.tol_f, self.train_hist['f val'][-1])) + print(' |_ train, val power fun: %2.2e / %2.2e, %2.2e' % + (self.train_hist['p'][-1], self.tol_p, self.train_hist['p val'][-1])) + else: + print(' |_ train residual : %2.2e / %2.2e' % (self.train_hist['f'][-1], self.tol_f)) + print(' |_ train power fun : %2.2e / %2.2e' % (self.train_hist['p'][-1], self.tol_p)) + + + +#%% Utilities to +import pickle +def save_object(obj, filename): + with open(filename, 'wb') as output: + pickle.dump(obj, output, pickle.HIGHEST_PROTOCOL) + +def load_object(filename): + with open(filename, 'rb') as input: + obj = pickle.load(input) + return obj + + + -- GitLab