diff --git a/data/README.md b/data/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/results_5reruns/README.md b/results_5reruns/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7b7192357011922b2db286a1c3af3998d6bc2e96 --- /dev/null +++ b/results_5reruns/README.md @@ -0,0 +1 @@ +Folder to store results. diff --git a/results_stability/README.md b/results_stability/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/section_4.1_compute_visualize.py b/section_4.1_compute_visualize.py index 3f6dcfd2ea462efd588586c91d129fadd59cac27..08af491fc61827b5756be3b9d94abba8edda5cf8 100644 --- a/section_4.1_compute_visualize.py +++ b/section_4.1_compute_visualize.py @@ -5,8 +5,8 @@ import torch -from P36_Francesco_Emma.utilities.main_function import run_everything, run_cross_validation -from P36_Francesco_Emma.utilities.hyperparameters import dic_hyperparams +from utils.main_function import run_everything, run_cross_validation +from utils.hyperparameters import dic_hyperparams import numpy as np from matplotlib import pyplot as plt @@ -46,7 +46,8 @@ array_eps, array_cv_f, array_cv_f_val, _, list_timings_1L = run_cross_validation ## Store in matlab for beautiful tikzfigure plots -path_for_results = os.getcwd() + '/P36_Francesco_Emma/paper_experiments/results/' +path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), 'results_5reruns/')) + os.makedirs(path_for_results, exist_ok=True) io.savemat(path_for_results + name_dataset + '.mat', dict(array_concatenate=array_concatenate, diff --git a/section_4.2_compute.py b/section_4.2_compute.py index 150836e0c5d5269a0b738ca14a38f5db5b4385f3..c60aa4a4bf64bfb9a24b52210598f90a8a62af64 100644 --- a/section_4.2_compute.py +++ b/section_4.2_compute.py @@ -4,12 +4,9 @@ # sets", especially to produce the results for the Figures 3-5. -import torch -from P36_Francesco_Emma.utilities.main_function import run_everything, run_cross_validation -from P36_Francesco_Emma.utilities.hyperparameters import dic_hyperparams +from utils.main_function import run_everything, run_cross_validation +from utils.hyperparameters import dic_hyperparams -import numpy as np -from matplotlib import pyplot as plt from scipy import io import os @@ -54,7 +51,8 @@ for idx_indices in [0, 1, 2, 3, 4]: ## Store in matlab for beautiful tikzfigure plots - path_for_results = os.getcwd() + '/P36_Francesco_Emma/paper_experiments/results/' + path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), 'results_5reruns/')) + os.makedirs(path_for_results, exist_ok=True) io.savemat(path_for_results + name_dataset + '_{}'.format(idx_indices) + '.mat', dict(array_concatenate=array_concatenate, diff --git a/section_4.2_visualize.py b/section_4.2_visualize.py index d286d581f736bc5208317f6116b92894a3f1ada9..1b63fa051db2a68ca3ecc169eb7af71323847403 100644 --- a/section_4.2_visualize.py +++ b/section_4.2_visualize.py @@ -9,7 +9,6 @@ from matplotlib import pyplot as plt from scipy import io import os import scipy -import utilities ## Some settings @@ -18,8 +17,7 @@ list_datasets = ['fried', 'sarcos', 'protein', 'ct', 'diamonds', 'wecs', 'mlr_knn_rng', 'query_agg_count', 'sgemm', 'road_network', 'methane', 'poker'] #, 'susy', 'higgs'] -basepath = utilities.get_basepath() -path_for_results = basepath + 'P36_Francesco_Emma/paper_experiments/results_5reruns/' +path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'results_5reruns/')) ## Initialize dictionaries to store several quantities diff --git a/section_4.3_compute.py b/section_4.3_compute.py index 74e6a3bfcc492b8b2fabb9bb16e895032d22911a..eeed37947103120675a0ebc2a951caaafaeb76ac 100644 --- a/section_4.3_compute.py +++ b/section_4.3_compute.py @@ -3,25 +3,21 @@ # Code related to the numerical experiment within Section 4.3. "Stability of the kernel # optimization", especially to produce the results for Figure 6. -# Similar to 03_4_stability_investigations.py, but now also running the VKOGA after the kernel optimization. -# I ran this file on ic6, ic7, ic8 to compute the results which are collected in the folder results_stability. import os import numpy as np from scipy.stats import ortho_group # Requires version 0.18 of scipy import pickle -import utilities -from P36_Francesco_Emma.utilities.dataset_collection import Dataset -from P36_Francesco_Emma.utilities.hyperparameters import dic_hyperparams -from P36_Francesco_Emma.utilities.main_function import run_everything +from utils.dataset_collection import Dataset +from utils.hyperparameters import dic_hyperparams +from utils.main_function import run_everything ## Some settings list_nctrs = [int(np.round(nr)) for nr in np.logspace(np.log(10) / np.log(10), np.log(1000) / np.log(10), 10)] -basepath = utilities.get_basepath() -path_for_results = basepath + 'P36_Francesco_Emma/paper_experiments/results_stability/' +path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), 'results_stability/')) flag_gaussian = False diff --git a/section_4.3_visualize.py b/section_4.3_visualize.py index 2d2ce7d26ed5c6bda9807e1e45b0a79f7e12264c..8eb70a6eeaf2bc889ea2c1b3ef4530eb12a925c3 100644 --- a/section_4.3_visualize.py +++ b/section_4.3_visualize.py @@ -3,9 +3,6 @@ # Code related to the numerical experiment within Section 4.3. "Stability of the kernel # optimization", especially to produce the plots for Figure 6. -# Evaluation and analysis of the results which are obtained from 04_1_stability_investigations.py. -# Only the wecs dataset is a bit weird: Here I do not understand what is going on: -# Only the first singular vectors are aligned, the other ones are not! import os from scipy import io @@ -14,14 +11,12 @@ from scipy.stats import ortho_group # Requires version 0.18 of scipy from matplotlib import pyplot as plt import pickle import scipy -import utilities ## Some settings list_nctrs = [int(np.round(nr)) for nr in np.logspace(np.log(10) / np.log(10), np.log(1000) / np.log(10), 10)] -basepath = utilities.get_basepath() -path_for_results = basepath + 'P36_Francesco_Emma/paper_experiments/results_stability/' +path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), 'results_stability/')) ## First, collect results from all files in a common dictionary @@ -167,81 +162,3 @@ for idx_dataset, name_dataset in enumerate(list(dic_results.keys())): break - -# ============================================================================= -# ============ The following code is meant for some investigations ============ -# ============================================================================= - - - -## Single plot: Compute the distances between the largest eigenspaces. We use nfolds = 64 as reference! -max_dim = 15 # maximum dimension to which we want to check similarity -name_dataset = 'wecs' - -nfold_ref = 64 # we will use this as a reference model! -list_idx = [0, 1, 2, 3, 4] - -for idx_index in list_idx: - - plt.figure(110 + idx_index) - plt.clf() - list_legend = [] - - for nfold in dic_results[name_dataset][idx_index]: - if nfold == nfold_ref: - continue # this is our reference model - - list_similarity = [] - for dim in range(max_dim): - array_angles = scipy.linalg.subspace_angles(dic_singvecs[name_dataset][idx_index][nfold][:, :(dim + 1)], - dic_singvecs[name_dataset][idx_index][nfold_ref][:, :(dim + 1)]) - - # Maybe modify this measure here? 90 degress means orthogonality!! - similarity = array_angles[0] - - list_similarity.append(np.rad2deg(similarity)) - - plt.plot(list(range(max_dim)), list_similarity) - list_legend.append('{} vs {}'.format(nfold_ref, nfold)) - - # energy_captured = np.sum(np.abs(dic_singvals[idx_index][idx_nfold][-10:])) / np.sum(np.abs(dic_singvals[idx_index][idx_nfold])) - # plt.title(name_dataset + ': energy captured within {} dims: {:.3f}%'.format(max_dim, 100*energy_captured)) - plt.title(name_dataset + '(dim={}): Alignment of subspaces'.format(dic_singvals[name_dataset][idx_index][nfold].shape[0])) - plt.legend(list_legend) - - -## Single plot: Plot the first few singular vectors -dic_colors = {0: 'k', 4: 'm', 8: 'g', 16: 'r', 32: 'b', 64: 'y'} -max_vectors = 3 # maximum dimension to which we want to check similarity -name_dataset = 'wecs' - -list_idx = [0, 1, 2, 3, 4] - -for idx_vector in range(max_vectors): - - plt.figure(1000 + idx_vector) - plt.clf() - list_legend = [] - - flag_legend = True - for idx_index in list_idx: - - for nfold in dic_results[name_dataset][idx_index]: - - if flag_legend: - list_legend.append(nfold) - - # ToDo: Flip if necessary - plt.plot(dic_singvecs[name_dataset][idx_index][nfold][:, idx_vector], color=dic_colors[nfold]) - - - flag_legend = False - - - # energy_captured = np.sum(np.abs(dic_singvals[idx_index][idx_nfold][-10:])) / np.sum(np.abs(dic_singvals[idx_index][idx_nfold])) - # plt.title(name_dataset + ': energy captured within {} dims: {:.3f}%'.format(max_dim, 100*energy_captured)) - plt.title(name_dataset + '(dim={}): Plot of singular vectors'.format(dic_singvals[name_dataset][idx_index][nfold].shape[0])) - plt.legend(list_legend) - - - diff --git a/utils/dataset_collection.py b/utils/dataset_collection.py index 58a1fab991874b512e135ae841e90d794d0f1361..65b849cbb6abc3988fc5ef1bc92bff7890bbcec3 100644 --- a/utils/dataset_collection.py +++ b/utils/dataset_collection.py @@ -1,11 +1,5 @@ import numpy as np -import pandas as pd import os -import h5py -import math -import scipy.io as spio -from P32_just_interpolate.utilities.utils import load_mnist_pair, get_basepath -import utilities class Dataset(): """ @@ -19,27 +13,10 @@ class Dataset(): self.y = None self.dic_dataset = { - 'example_2d': (lambda x: .02 * (x[:, [0]] + x[:, [1]]) ** 2 + np.sin(2 * math.pi * (x[:, [0]] - x[:, [1]])), 2), - 'example_2d_tiz': (lambda x: x[:, [0]] + (x[:, [0]] + .1 * x[:, [1]]) ** 2, 2), - 'example_2d_radial': (lambda x: np.exp(-4 * np.sum((x - .5 * np.ones_like(x)) ** 2, axis=1, keepdims=True)), 2), - 'example_2d_active_1': (lambda x: np.abs(x[:, [0]] - 2*x[:, [1]]), 2), - 'example_2d_franke': (lambda x: 0.75 * np.exp(-(9 * x[:, [0]] - 2) ** 2 / 4 - (9 * x[:, [1]]- 2) ** 2 / 4) - + 0.75 * np.exp(-(9 * x[:, [0]] + 1) ** 2 / 49 - (9 * x[:, [1]] + 1) / 10) - + 0.5 * np.exp(-(9 * x[:, [0]] - 7) ** 2 / 4 - (9 * x[:, [1]] - 3) ** 2 / 4) - - 0.2 * np.exp(-(9 * x[:, [0]] - 4) ** 2 - (9 * x[:, [1]] - 7) ** 2), 2), - 'example_3d_active_1': (lambda x: x[:, [0]] + 2 * x[:, [1]], 3), - 'example_3d_active_2': (lambda x: (x[:, [0]] + x[:, [1]]) ** 2 + np.sin(2 * math.pi * (x[:, [0]] - x[:, [1]])), 3), - 'example_10d_active': (lambda x: x[:, [0]] * (x[:, [1]] - x[:, [2]]) ** 3 + 2 * np.sin(math.pi * (x[:, [1]] - x[:, [2]])) - np.exp(-2 * x[:, [2]]), 10), - 'example_10d_vanishing': (lambda x: x @ ((np.arange(10) + 1.0) ** (-2)).reshape(-1, 1), 10), - 'example_10d_radial': (lambda x: np.exp(-4 * np.sum((x[:, :5] - .5 * np.ones_like(x[:, :5])) ** 2, axis=1, keepdims=True)), 10), - 'example_5d_radial': (lambda x: np.exp(-4 * np.sum((x[:, :] - .5 * np.ones_like(x[:, :])) ** 2, axis=1, keepdims=True)), 5), - 'example_5d_active': (lambda x: (x[:, [1]] - x[:, [2]]), 5), 'example_5d_faster_conv': (lambda x: np.exp(-4 * np.sum(x[:, :] - .5 * np.ones_like(x[:, :]), axis=1, keepdims=True) ** 2), 5), 'example_6d_kink': (lambda x: (np.exp(-4 * np.sum((x[:, :] - .5 * np.ones_like(x[:, :])) ** 2, axis=1, keepdims=True)) + 2 * np.abs(x[:, [0]] - .5)), 6), 'example_7d_semiactive': (lambda x: np.exp(-4 * np.sum((x[:, :] - .5 * np.ones_like(x[:, :])) ** 2, axis=1, keepdims=True)) + np.exp(-9 * np.sum((x[:, :2] - .3 * np.ones_like(x[:, :2])) ** 2, axis=1, keepdims=True)), 7), - # 'example_winkle': (lambda x: ((np.sin(x[:, [0]]) * np.exp(x[:, [1]]) + x[:, [2]]) ** ((x[:, [3]] + 1) ** 2)) - # / ((x[:, [3]] + 1) ** 2), 5), } @@ -52,141 +29,24 @@ class Dataset(): X = np.random.rand(self.N_points, dim) y = fcn(X) - - elif name_dataset == 'example_2d_clusters': - X, y = self.example_2d_clusters() - elif name_dataset[:24] == 'example_highdim_clusters': - dim = int(name_dataset.split('_')[-1]) - X, y = self.example_highdim_clusters(dim) - elif name_dataset[:10] == 'toyexample': - X, y = self.toyexample(name_dataset) + elif name_dataset in ['ct', 'diamonds', 'fried', 'kegg_undir_uci', + 'methane', 'mlr_knn_rng', 'online_video', 'poker', + 'protein', 'query_agg_count', 'road_network', 'sarcos', 'sgemm', + 'stock', 'wecs']: + X, y = self.example_holzmuller(name_dataset) else: - if name_dataset[:13] == 'example_mnist': - X, y = self.example_mnist(name_dataset) - elif name_dataset == 'example_TUD': - X, y = self.example_TUD() - elif name_dataset in ['ct', 'diamonds', 'fried', 'kegg_undir_uci', - 'methane', 'mlr_knn_rng', 'online_video', 'poker', - 'protein', 'query_agg_count', 'road_network', 'sarcos', 'sgemm', - 'stock', 'wecs']: - X, y = self.example_holzmuller(name_dataset) - elif name_dataset in ['flights', 'higgs', 'susy', 'taxi', 'timit']: - X, y = self.falkon_datasets(name_dataset) - elif name_dataset in ['uci_airfoil_self_noise', 'uci_CCPP']: - X, y = self.example_uci(name_dataset) + pass assert X is not None, 'Bug in get_data!' assert y is not None, 'Bug in get_data!' return X, y - def example_2d_clusters(self): - - X1 = np.random.randn(100, 2) - X2 = .1 * np.random.randn(500, 2) + np.array([.5, .5]) - X3 = .2 * np.random.randn(500, 2) + np.array([-.5, -.5]) - - f_func = lambda x: 1 / (1 + np.abs(x[:, 0] - .5)) - - X = np.concatenate([X1, X2, X3], axis=0) - y = f_func(X) - - return X, y - - def example_highdim_clusters(self, dim): - # In the meeting with Antoine and Giacomo we cam up with a hypothesis when greedy works better. - - # X1 = np.random.rand(1000, dim) - # X2 = .1 * np.random.rand(5000, dim) + .5 * np.zeros((1, dim)) - # X3 = .2 * np.random.rand(5000, dim) - .5 * np.zeros((1, dim)) - # X = np.concatenate([X1, X2, X3], axis=0) - # - # y = np.ones((X.shape[0], 1)) - # y[X[:, 0] > .5] = -1 - - N_points = self.N_points - - # Example 1: Peak at center, then decay. randn distribution. - # X = np.random.randn(N_points, dim) - # center = .5 * np.ones((1, dim)) - # y = 1 / (1 + 5 * np.linalg.norm(X - center, axis=1, keepdims=True)) - # y = y + 1e-3 * np.random.randn(y.shape[0], 1) - - - # Example2: Peak at center, then decay. sparse randn distribution and two clusters. - X1 = np.random.randn(int(.1*N_points), dim) - X2 = .1 * np.random.rand(int(.45*N_points), dim) + .5 * np.ones((1, dim)) - X3 = .2 * np.random.rand(int(.45*N_points), dim) - .5 * np.ones((1, dim)) - X = np.concatenate([X1, X2, X3], axis=0) - - center = .5 * np.ones((1, dim)) - y = 1 / (1 + 5 * np.linalg.norm(X - center, axis=1, keepdims=True)) - y = y + 1e-3 * np.random.randn(y.shape[0], 1) - - return X, y - - - def example_mnist(self, name_dataset): - """MNIST dataset""" - - X_train, X_test, y_train, y_test = \ - load_mnist_pair(get_basepath(), - [int(name_dataset[-2]), int(name_dataset[-1])]) - - X = np.concatenate([X_train, X_test], axis=0) - y = np.concatenate([y_train, y_test], axis=0).reshape(-1, 1) - - return X, y - - def example_uci(self, name_dataset): - """Some UCI datasets""" - - # path = '/usr/local/home/wenzeltn/deepkernel-pytorch2/data/UCI/' - path = '/home/wenzeltn/local_home/deepkernel-pytorch2/data/UCI/' - - if name_dataset == 'uci_airfoil_self_noise': - data = np.loadtxt(path + 'airfoil_self_noise.dat', unpack=True).transpose() - - X = data[:, :5] - y = data[:, [5]] - - X = (X - X.mean(axis=0, keepdims=True)) / (X.std(axis=0, keepdims=True) + 1e-30) - y = (y - y.mean(axis=0, keepdims=True)) / (y.std(axis=0, keepdims=True) + 1e-30) - - if name_dataset == 'uci_CCPP': - import pandas as pd - data = pd.read_excel(path + 'CCPP/' + 'Folds5x2_pp.xlsx').to_numpy() - - X = data[:, :4] - y = data[:, [4]] - - X = (X - X.mean(axis=0, keepdims=True)) / (X.std(axis=0, keepdims=True) + 1e-30) - y = (y - y.mean(axis=0, keepdims=True)) / (y.std(axis=0, keepdims=True) + 1e-30) - - return X, y - - def example_TUD(self): - """Felix Döppel data""" - - path = '/home/wenzeltn/local_home/deepkernel-pytorch2/P01_DK_optimization/data/' - # path = '/usr/local/home/wenzeltn/deepkernel-pytorch2/P01_DK_optimization/data/' - mat_train = spio.loadmat(path + 'DataTUDKinetics.mat', squeeze_me=True) - # mat_test = spio.loadmat(path + 'TestTUDKinetics.mat', squeeze_me=True) - - X = mat_train['input'] - y = mat_train['output'].reshape(-1, 1) - - return X, y def example_holzmuller(self, ds_name): """Data set from David Holzmüller paper of batch active learning.""" - if os.path.exists('/usr/local/home/wenzeltn'): # anm03 - path = '/usr/local/home/wenzeltn/deepkernel-pytorch2/data/data_holzmuller/' - elif os.path.exists('/usr/local/storage/wenzeltn'): # ianscluster - path = '/usr/local/storage/wenzeltn/deepkernel-pytorch2/data/data_holzmuller/' - elif os.path.exists('/home/wenzeltn/'): # laptop - path = '/home/wenzeltn/local_home/deepkernel-pytorch2/data/data_holzmuller/' + path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'data/')) X = np.load(path + ds_name + '/X.npy') y = np.load(path + ds_name + '/y.npy') @@ -199,93 +59,3 @@ class Dataset(): return X, y - def toyexample(self, ds_name): - """Toy datasets from Tizian created on 04.01.23.""" - - basepath = utilities.get_basepath() - path = basepath + 'data/TOY_selfcreated/' - - X = np.load(path + ds_name + 'X.npy') - y = np.load(path + ds_name + 'y.npy') - - # Standardization of input according to David H (reduce effect of outliers) - X = (X - X.mean(axis=0, keepdims=True)) / (X.std(axis=0, keepdims=True) + 1e-30) - X = 5 * np.tanh(0.2 * X) - - y = (y - y.mean(axis=0, keepdims=True)) / (y.std(axis=0, keepdims=True) + 1e-30) - - return X, y - - - def falkon_datasets(self, ds_name): - """Datasets from FALKON paper.""" - - path = None - if os.path.exists('/usr/local/home/wenzeltn'): # anm03 - path = '/usr/local/home/wenzeltn/deepkernel-pytorch2/data/FALKON_data/' - elif os.path.exists('/usr/local/storage/wenzeltn'): # ianscluster - path = '/usr/local/storage/wenzeltn/deepkernel-pytorch2/data/FALKON_data/' - elif os.path.exists('/home/wenzeltn/'): - pass - - filename = None - if ds_name == 'susy' or ds_name == 'higgs': - if ds_name == 'susy': - filename = 'Susy.mat' - else: - filename = 'Higgs.mat' - - with h5py.File(path + filename, "r") as h5py_file: - arr = np.asarray(h5py_file['X'], dtype=np.float32).T - X = arr[:, 1:] - y = arr[:, 0].reshape(-1, 1) - - # Preprocess input - mtr = np.mean(X, axis=0, dtype=np.float64, keepdims=True).astype(X.dtype) - vtr = np.var(X, axis=0, dtype=np.float64, ddof=1, keepdims=True).astype(X.dtype) - - X -= mtr - X /= vtr - - # Preprocess outputs - y = y * 2 - 1 - - - elif ds_name == 'timit': # num_train_samples = 1124823 - pass - # ToDo: Does not work, dataset probably damaged - - # filename = 'TIMIT.mat' - # - # with h5py.File(path + filename, 'r') as h5py_file: - # dtype = np.float32 - # Xtr = np.array(h5py_file['Xtr'], dtype=dtype) - # Xts = np.array(h5py_file['Xts'], dtype=dtype) - # Ytr = np.array(h5py_file['Ytr'], dtype=dtype).reshape((-1,)) - # Yts = np.array(h5py_file['Yts'], dtype=dtype).reshape((-1,)) - # X = np.concatenate((Xtr, Xts), axis=0) - # y = np.concatenate((Ytr, Yts), axis=0) - # - # - # # - # # f = spio.loadmat(path + filename) - # # dtype = np.float32 - # # Xtr = np.array(f['Xtr'], dtype=dtype) - # # Xts = np.array(f['Xts'], dtype=dtype) - # # Ytr = np.array(f['Ytr'], dtype=dtype).reshape((-1,)) - # # Yts = np.array(f['Yts'], dtype=dtype).reshape((-1,)) - # # X = np.concatenate((Xtr, Xts), axis=0) - # # y = np.concatenate((Ytr, Yts), axis=0) - - # elif ds_name == 'flights': - # filename = 'flights.csv' - # # Not implemented, because I have a .csv file, Giacomo uses an .hdf5 file - # - # elif ds_name == 'taxi': - # pass - # # Not implemented because super large - - # ToDo: Not implemented: Taxi since very large, flights only in .csv, timit is broken - - return X, y - diff --git a/utils/hyperparameters.py b/utils/hyperparameters.py index 02469eea921fc1cbb321ea65b2946e26ffc01f2e..9cd1ca153e3c915ba8ed3e2710c944eb7d3a83c6 100644 --- a/utils/hyperparameters.py +++ b/utils/hyperparameters.py @@ -22,54 +22,6 @@ class example_func_approx(): k_matern = 0 -class example_2d(example_func_approx): - pass - -class example_10d_vanishing(example_func_approx): - flag_gaussian = True - -# class example_toy(): -# maxIter_vkoga = 250 -# N_points = 10000 -# noise_level = 1e-3 -# -# reg_para_optim = 1e-5 # for kernel optimization -# reg_para_vkoga = 0 # for running VKOGA -# learning_rate = 5e-3 -# n_epochs = 25 -# batch_size = 64 -# n_folds = None -# -# flag_initialize_diagonal = True -# flag_symmetric = False -# flag_gaussian = False -# -# n_cross_val = 10 -# shape_para = 1 -# k_matern = 0 - - -class example_TUD(): - maxIter_vkoga = 500 - N_points = None - noise_level = 0 - - reg_para_optim = 1e-5 # for kernel optimization - reg_para_vkoga = 0 # for running VKOGA - learning_rate = 5e-3 - n_epochs = 50 - batch_size = 64 - n_folds = None - - flag_initialize_diagonal = True - flag_symmetric = False - flag_gaussian = True - - n_cross_val = 10 - shape_para = 1 - k_matern = 0 - - class example_holzmuller(): maxIter_vkoga = 1000 N_points = None @@ -90,48 +42,15 @@ class example_holzmuller(): shape_para = 1 / 5 k_matern = 0 + class example_holzmuller_few_epochs(example_holzmuller): n_epochs = 10 - -class example_mnist(example_holzmuller): - n_epochs = 5 - maxIter_vkoga = 200 - -class example_airfoil(example_holzmuller): - n_epochs = 25 - learning_rate = 2e-2 - maxIter_vkoga = 500 - k_matern = 0 - -class example_CCPP(example_holzmuller): - n_epochs = 25 - learning_rate = 2e-2 - maxIter_vkoga = 500 - k_matern = 0 - - - dic_hyperparams = { - 'example_2d': example_func_approx(), - 'example_2d_tiz': example_func_approx(), - 'example_2d_radial': example_func_approx(), - 'example_2d_active_1': example_func_approx(), - 'example_2d_franke': example_func_approx(), - 'example_3d_active_1': example_func_approx(), - 'example_3d_active_2': example_func_approx(), - 'example_10d_active': example_func_approx(), - 'example_10d_vanishing': example_func_approx(), - 'example_10d_radial': example_func_approx(), - 'example_5d_radial': example_func_approx(), - 'example_5d_active': example_func_approx(), 'example_5d_faster_conv': example_func_approx(), 'example_6d_kink': example_func_approx(), 'example_7d_semiactive': example_func_approx(), - 'example_winkle': example_func_approx(), - 'example_TUD': example_TUD(), - 'ct': example_holzmuller_few_epochs(), 'diamonds': example_holzmuller_few_epochs(), @@ -147,17 +66,6 @@ dic_hyperparams = { 'sarcos': example_holzmuller_few_epochs(), 'sgemm': example_holzmuller_few_epochs(), 'stock': example_holzmuller_few_epochs(), - 'wecs': example_holzmuller_few_epochs(), - - 'susy': example_holzmuller_few_epochs(), - 'higgs': example_holzmuller_few_epochs(), - - - 'example_mnist_01': example_mnist(), - 'example_mnist_34': example_mnist(), - - 'uci_airfoil_self_noise': example_airfoil(), - 'uci_CCPP': example_CCPP(), - + 'wecs': example_holzmuller_few_epochs() } diff --git a/utils/kernels.py b/utils/kernels.py index 0d223479acc1118563f9635d7d15a996605645f6..fd34de5687ecab26ab73fdb46c20166e7f519147 100644 --- a/utils/kernels.py +++ b/utils/kernels.py @@ -5,6 +5,7 @@ from scipy.spatial import distance_matrix import numpy as np import matplotlib.pyplot as plt + # Abstract kernel class Kernel(ABC): @abstractmethod diff --git a/utils/main_function.py b/utils/main_function.py index ea871ee2a46929fee066d0d6c43bba30f3d75333..79f5010227d41cff7f7bad286c1291548a113dd5 100644 --- a/utils/main_function.py +++ b/utils/main_function.py @@ -1,29 +1,25 @@ -from P36_Francesco_Emma.models.optimized_kernel import OptimizedKernel -from P36_Francesco_Emma.utilities.dataset_collection import Dataset -from P36_Francesco_Emma.utilities.hyperparameters import dic_hyperparams +from utils.optimized_kernel import OptimizedKernel +from utils.dataset_collection import Dataset +from utils.hyperparameters import dic_hyperparams -from vkoga import tkernels, kernels +from utils import tkernels, kernels import torch from matplotlib import pyplot as plt import numpy as np import time -from sklearn.model_selection import train_test_split -from vkoga.kernels import Matern -from vkoga.vkoga import VKOGA +from utils.vkoga import VKOGA import os from datetime import datetime -import utilities ## Some settings list_nctrs = [int(np.round(nr)) for nr in np.logspace(np.log(10) / np.log(10), np.log(1000) / np.log(10), 10)] -basepath = utilities.get_basepath() -path_for_indices = basepath + 'data/' +path_for_indices = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'data/')) # Main functiono to run kernel optimization with subsequent vkoga diff --git a/utils/optimized_kernel.py b/utils/optimized_kernel.py index 423faf4ed875e5e78c1e508ec15c36d58d26f56d..fb3aae3ba8d7d965c9912dcded4149313f82a317 100644 --- a/utils/optimized_kernel.py +++ b/utils/optimized_kernel.py @@ -1,6 +1,6 @@ from torch import nn import torch -from P36_Francesco_Emma.utilities.cv_rippa_ext import compute_cv_loss_via_rippa_ext_2 +from utils.cv_rippa_ext import compute_cv_loss_via_rippa_ext_2 import numpy as np diff --git a/utils/vkoga.py b/utils/vkoga.py index 1731605560da4189f8cb88ab5961f5bf51742a09..5e7fd4bd6c5b6b3a59f24c5d723f56c2fd87dfaa 100644 --- a/utils/vkoga.py +++ b/utils/vkoga.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -from vkoga.kernels import Gaussian +from utils.kernels import Gaussian import numpy as np from sklearn.base import BaseEstimator from sklearn.utils.validation import check_X_y, check_array, check_is_fitted