From 83889b8583548e1a240ef4033b0b4be7edb1e9ea Mon Sep 17 00:00:00 2001
From: Tizian Wenzel <wenzeltn@nbanm02.mathematik.uni-stuttgart.de>
Date: Thu, 8 Jun 2023 12:22:00 +0200
Subject: [PATCH] Modified code.

---
 data/README.md                   |   0
 results_5reruns/README.md        |   1 +
 results_stability/README.md      |   0
 section_4.1_compute_visualize.py |   7 +-
 section_4.2_compute.py           |  10 +-
 section_4.2_visualize.py         |   4 +-
 section_4.3_compute.py           |  12 +-
 section_4.3_visualize.py         |  85 +----------
 utils/dataset_collection.py      | 244 +------------------------------
 utils/hyperparameters.py         |  96 +-----------
 utils/kernels.py                 |   1 +
 utils/main_function.py           |  16 +-
 utils/optimized_kernel.py        |   2 +-
 utils/vkoga.py                   |   2 +-
 14 files changed, 33 insertions(+), 447 deletions(-)
 create mode 100644 data/README.md
 create mode 100644 results_5reruns/README.md
 create mode 100644 results_stability/README.md

diff --git a/data/README.md b/data/README.md
new file mode 100644
index 0000000..e69de29
diff --git a/results_5reruns/README.md b/results_5reruns/README.md
new file mode 100644
index 0000000..7b71923
--- /dev/null
+++ b/results_5reruns/README.md
@@ -0,0 +1 @@
+Folder to store results.
diff --git a/results_stability/README.md b/results_stability/README.md
new file mode 100644
index 0000000..e69de29
diff --git a/section_4.1_compute_visualize.py b/section_4.1_compute_visualize.py
index 3f6dcfd..08af491 100644
--- a/section_4.1_compute_visualize.py
+++ b/section_4.1_compute_visualize.py
@@ -5,8 +5,8 @@
 
 
 import torch
-from P36_Francesco_Emma.utilities.main_function import run_everything, run_cross_validation
-from P36_Francesco_Emma.utilities.hyperparameters import dic_hyperparams
+from utils.main_function import run_everything, run_cross_validation
+from utils.hyperparameters import dic_hyperparams
 
 import numpy as np
 from matplotlib import pyplot as plt
@@ -46,7 +46,8 @@ array_eps, array_cv_f, array_cv_f_val, _, list_timings_1L = run_cross_validation
 
 
 ## Store in matlab for beautiful tikzfigure plots
-path_for_results = os.getcwd() + '/P36_Francesco_Emma/paper_experiments/results/'
+path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), 'results_5reruns/'))
+
 os.makedirs(path_for_results, exist_ok=True)
 io.savemat(path_for_results + name_dataset + '.mat',
            dict(array_concatenate=array_concatenate,
diff --git a/section_4.2_compute.py b/section_4.2_compute.py
index 150836e..c60aa4a 100644
--- a/section_4.2_compute.py
+++ b/section_4.2_compute.py
@@ -4,12 +4,9 @@
 # sets", especially to produce the results for the Figures 3-5.
 
 
-import torch
-from P36_Francesco_Emma.utilities.main_function import run_everything, run_cross_validation
-from P36_Francesco_Emma.utilities.hyperparameters import dic_hyperparams
+from utils.main_function import run_everything, run_cross_validation
+from utils.hyperparameters import dic_hyperparams
 
-import numpy as np
-from matplotlib import pyplot as plt
 from scipy import io
 import os
 
@@ -54,7 +51,8 @@ for idx_indices in [0, 1, 2, 3, 4]:
 
 
         ## Store in matlab for beautiful tikzfigure plots
-        path_for_results = os.getcwd() + '/P36_Francesco_Emma/paper_experiments/results/'
+        path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), 'results_5reruns/'))
+
         os.makedirs(path_for_results, exist_ok=True)
         io.savemat(path_for_results + name_dataset + '_{}'.format(idx_indices) + '.mat',
                    dict(array_concatenate=array_concatenate,
diff --git a/section_4.2_visualize.py b/section_4.2_visualize.py
index d286d58..1b63fa0 100644
--- a/section_4.2_visualize.py
+++ b/section_4.2_visualize.py
@@ -9,7 +9,6 @@ from matplotlib import pyplot as plt
 from scipy import io
 import os
 import scipy
-import utilities
 
 
 ## Some settings
@@ -18,8 +17,7 @@ list_datasets = ['fried', 'sarcos', 'protein', 'ct', 'diamonds',
                  'wecs', 'mlr_knn_rng', 'query_agg_count',
                  'sgemm', 'road_network', 'methane', 'poker'] #, 'susy', 'higgs']
 
-basepath = utilities.get_basepath()
-path_for_results = basepath + 'P36_Francesco_Emma/paper_experiments/results_5reruns/'
+path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'results_5reruns/'))
 
 
 ## Initialize dictionaries to store several quantities
diff --git a/section_4.3_compute.py b/section_4.3_compute.py
index 74e6a3b..eeed379 100644
--- a/section_4.3_compute.py
+++ b/section_4.3_compute.py
@@ -3,25 +3,21 @@
 # Code related to the numerical experiment within Section 4.3. "Stability of the kernel
 # optimization", especially to produce the results for Figure 6.
 
-# Similar to 03_4_stability_investigations.py, but now also running the VKOGA after the kernel optimization.
-# I ran this file on ic6, ic7, ic8 to compute the results which are collected in the folder results_stability.
 
 import os
 import numpy as np
 from scipy.stats import ortho_group  # Requires version 0.18 of scipy
 import pickle
-import utilities
 
-from P36_Francesco_Emma.utilities.dataset_collection import Dataset
-from P36_Francesco_Emma.utilities.hyperparameters import dic_hyperparams
-from P36_Francesco_Emma.utilities.main_function import run_everything
+from utils.dataset_collection import Dataset
+from utils.hyperparameters import dic_hyperparams
+from utils.main_function import run_everything
 
 
 ## Some settings
 list_nctrs = [int(np.round(nr)) for nr in np.logspace(np.log(10) / np.log(10), np.log(1000) / np.log(10), 10)]
 
-basepath = utilities.get_basepath()
-path_for_results = basepath + 'P36_Francesco_Emma/paper_experiments/results_stability/'
+path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), 'results_stability/'))
 
 
 flag_gaussian = False
diff --git a/section_4.3_visualize.py b/section_4.3_visualize.py
index 2d2ce7d..8eb70a6 100644
--- a/section_4.3_visualize.py
+++ b/section_4.3_visualize.py
@@ -3,9 +3,6 @@
 # Code related to the numerical experiment within Section 4.3. "Stability of the kernel
 # optimization", especially to produce the plots for Figure 6.
 
-# Evaluation and analysis of the results which are obtained from 04_1_stability_investigations.py.
-# Only the wecs dataset is a bit weird: Here I do not understand what is going on:
-# Only the first singular vectors are aligned, the other ones are not!
 
 import os
 from scipy import io
@@ -14,14 +11,12 @@ from scipy.stats import ortho_group  # Requires version 0.18 of scipy
 from matplotlib import pyplot as plt
 import pickle
 import scipy
-import utilities
 
 
 ## Some settings
 list_nctrs = [int(np.round(nr)) for nr in np.logspace(np.log(10) / np.log(10), np.log(1000) / np.log(10), 10)]
 
-basepath = utilities.get_basepath()
-path_for_results = basepath + 'P36_Francesco_Emma/paper_experiments/results_stability/'
+path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), 'results_stability/'))
 
 
 ## First, collect results from all files in a common dictionary
@@ -167,81 +162,3 @@ for idx_dataset, name_dataset in enumerate(list(dic_results.keys())):
         break
 
 
-
-# =============================================================================
-# ============ The following code is meant for some investigations ============
-# =============================================================================
-
-
-
-## Single plot: Compute the distances between the largest eigenspaces. We use nfolds = 64 as reference!
-max_dim = 15  # maximum dimension to which we want to check similarity
-name_dataset = 'wecs'
-
-nfold_ref = 64  # we will use this as a reference model!
-list_idx = [0, 1, 2, 3, 4]
-
-for idx_index in list_idx:
-
-    plt.figure(110 + idx_index)
-    plt.clf()
-    list_legend = []
-
-    for nfold in dic_results[name_dataset][idx_index]:
-        if nfold == nfold_ref:
-            continue  # this is our reference model
-
-        list_similarity = []
-        for dim in range(max_dim):
-            array_angles = scipy.linalg.subspace_angles(dic_singvecs[name_dataset][idx_index][nfold][:, :(dim + 1)],
-                                                        dic_singvecs[name_dataset][idx_index][nfold_ref][:, :(dim + 1)])
-
-            # Maybe modify this measure here? 90 degress means orthogonality!!
-            similarity = array_angles[0]
-
-            list_similarity.append(np.rad2deg(similarity))
-
-        plt.plot(list(range(max_dim)), list_similarity)
-        list_legend.append('{} vs {}'.format(nfold_ref, nfold))
-
-    # energy_captured = np.sum(np.abs(dic_singvals[idx_index][idx_nfold][-10:])) / np.sum(np.abs(dic_singvals[idx_index][idx_nfold]))
-    # plt.title(name_dataset + ': energy captured within {} dims: {:.3f}%'.format(max_dim, 100*energy_captured))
-    plt.title(name_dataset + '(dim={}): Alignment of subspaces'.format(dic_singvals[name_dataset][idx_index][nfold].shape[0]))
-    plt.legend(list_legend)
-
-
-## Single plot: Plot the first few singular vectors
-dic_colors = {0: 'k', 4: 'm', 8: 'g', 16: 'r', 32: 'b', 64: 'y'}
-max_vectors = 3  # maximum dimension to which we want to check similarity
-name_dataset = 'wecs'
-
-list_idx = [0, 1, 2, 3, 4]
-
-for idx_vector in range(max_vectors):
-
-    plt.figure(1000 + idx_vector)
-    plt.clf()
-    list_legend = []
-
-    flag_legend = True
-    for idx_index in list_idx:
-
-        for nfold in dic_results[name_dataset][idx_index]:
-
-            if flag_legend:
-                list_legend.append(nfold)
-
-            # ToDo: Flip if necessary
-            plt.plot(dic_singvecs[name_dataset][idx_index][nfold][:, idx_vector], color=dic_colors[nfold])
-
-
-        flag_legend = False
-
-
-    # energy_captured = np.sum(np.abs(dic_singvals[idx_index][idx_nfold][-10:])) / np.sum(np.abs(dic_singvals[idx_index][idx_nfold]))
-    # plt.title(name_dataset + ': energy captured within {} dims: {:.3f}%'.format(max_dim, 100*energy_captured))
-    plt.title(name_dataset + '(dim={}): Plot of singular vectors'.format(dic_singvals[name_dataset][idx_index][nfold].shape[0]))
-    plt.legend(list_legend)
-
-
-
diff --git a/utils/dataset_collection.py b/utils/dataset_collection.py
index 58a1fab..65b849c 100644
--- a/utils/dataset_collection.py
+++ b/utils/dataset_collection.py
@@ -1,11 +1,5 @@
 import numpy as np
-import pandas as pd
 import os
-import h5py
-import math
-import scipy.io as spio
-from P32_just_interpolate.utilities.utils import load_mnist_pair, get_basepath
-import utilities
 
 class Dataset():
     """
@@ -19,27 +13,10 @@ class Dataset():
         self.y = None
 
         self.dic_dataset = {
-            'example_2d': (lambda x: .02 * (x[:, [0]] + x[:, [1]]) ** 2 + np.sin(2 * math.pi * (x[:, [0]] - x[:, [1]])), 2),
-            'example_2d_tiz': (lambda x: x[:, [0]] + (x[:, [0]] + .1 * x[:, [1]]) ** 2, 2),
-            'example_2d_radial': (lambda x: np.exp(-4 * np.sum((x - .5 * np.ones_like(x)) ** 2, axis=1, keepdims=True)), 2),
-            'example_2d_active_1': (lambda x: np.abs(x[:, [0]] - 2*x[:, [1]]), 2),
-            'example_2d_franke': (lambda x: 0.75 * np.exp(-(9 * x[:, [0]] - 2) ** 2 / 4 - (9 * x[:, [1]]- 2) ** 2 / 4)
-                                            + 0.75 * np.exp(-(9 * x[:, [0]] + 1) ** 2 / 49 - (9 * x[:, [1]] + 1) / 10)
-                                            + 0.5 * np.exp(-(9 * x[:, [0]] - 7) ** 2 / 4 - (9 * x[:, [1]] - 3) ** 2 / 4)
-                                            - 0.2 * np.exp(-(9 * x[:, [0]] - 4) ** 2 - (9 * x[:, [1]] - 7) ** 2), 2),
-            'example_3d_active_1': (lambda x: x[:, [0]] + 2 * x[:, [1]], 3),
-            'example_3d_active_2': (lambda x: (x[:, [0]] + x[:, [1]]) ** 2 + np.sin(2 * math.pi * (x[:, [0]] - x[:, [1]])), 3),
-            'example_10d_active': (lambda x: x[:, [0]] * (x[:, [1]] - x[:, [2]]) ** 3 + 2 * np.sin(math.pi * (x[:, [1]] - x[:, [2]])) - np.exp(-2 * x[:, [2]]), 10),
-            'example_10d_vanishing': (lambda x: x @ ((np.arange(10) + 1.0) ** (-2)).reshape(-1, 1), 10),
-            'example_10d_radial': (lambda x: np.exp(-4 * np.sum((x[:, :5] - .5 * np.ones_like(x[:, :5])) ** 2, axis=1, keepdims=True)), 10),
-            'example_5d_radial': (lambda x: np.exp(-4 * np.sum((x[:, :] - .5 * np.ones_like(x[:, :])) ** 2, axis=1, keepdims=True)), 5),
-            'example_5d_active': (lambda x: (x[:, [1]] - x[:, [2]]), 5),
             'example_5d_faster_conv': (lambda x: np.exp(-4 * np.sum(x[:, :] - .5 * np.ones_like(x[:, :]), axis=1, keepdims=True) ** 2), 5),
             'example_6d_kink': (lambda x: (np.exp(-4 * np.sum((x[:, :] - .5 * np.ones_like(x[:, :])) ** 2, axis=1, keepdims=True)) + 2 * np.abs(x[:, [0]] - .5)), 6),
             'example_7d_semiactive': (lambda x: np.exp(-4 * np.sum((x[:, :] - .5 * np.ones_like(x[:, :])) ** 2, axis=1, keepdims=True))
                                       + np.exp(-9 * np.sum((x[:, :2] - .3 * np.ones_like(x[:, :2])) ** 2, axis=1, keepdims=True)), 7),
-            # 'example_winkle': (lambda x: ((np.sin(x[:, [0]]) * np.exp(x[:, [1]]) + x[:, [2]]) ** ((x[:, [3]] + 1) ** 2))
-            #                              / ((x[:, [3]] + 1) ** 2), 5),
         }
 
 
@@ -52,141 +29,24 @@ class Dataset():
 
             X = np.random.rand(self.N_points, dim)
             y = fcn(X)
-
-        elif name_dataset == 'example_2d_clusters':
-            X, y = self.example_2d_clusters()
-        elif name_dataset[:24] == 'example_highdim_clusters':
-            dim = int(name_dataset.split('_')[-1])
-            X, y = self.example_highdim_clusters(dim)
-        elif name_dataset[:10] == 'toyexample':
-            X, y = self.toyexample(name_dataset)
+        elif name_dataset in ['ct', 'diamonds', 'fried', 'kegg_undir_uci',
+                              'methane', 'mlr_knn_rng', 'online_video', 'poker',
+                              'protein', 'query_agg_count', 'road_network', 'sarcos', 'sgemm',
+                              'stock', 'wecs']:
+            X, y = self.example_holzmuller(name_dataset)
         else:
-            if name_dataset[:13] == 'example_mnist':
-                X, y = self.example_mnist(name_dataset)
-            elif name_dataset == 'example_TUD':
-                X, y = self.example_TUD()
-            elif name_dataset in ['ct', 'diamonds', 'fried', 'kegg_undir_uci',
-                                  'methane', 'mlr_knn_rng', 'online_video', 'poker',
-                                  'protein', 'query_agg_count', 'road_network', 'sarcos', 'sgemm',
-                                  'stock', 'wecs']:
-                X, y = self.example_holzmuller(name_dataset)
-            elif name_dataset in ['flights', 'higgs', 'susy', 'taxi', 'timit']:
-                X, y = self.falkon_datasets(name_dataset)
-            elif name_dataset in ['uci_airfoil_self_noise', 'uci_CCPP']:
-                X, y = self.example_uci(name_dataset)
+            pass
 
         assert X is not None, 'Bug in get_data!'
         assert y is not None, 'Bug in get_data!'
 
         return X, y
 
-    def example_2d_clusters(self):
-
-        X1 = np.random.randn(100, 2)
-        X2 = .1 * np.random.randn(500, 2) + np.array([.5, .5])
-        X3 = .2 * np.random.randn(500, 2) + np.array([-.5, -.5])
-
-        f_func = lambda x: 1 / (1 + np.abs(x[:, 0] - .5))
-
-        X = np.concatenate([X1, X2, X3], axis=0)
-        y = f_func(X)
-
-        return X, y
-
-    def example_highdim_clusters(self, dim):
-        # In the meeting with Antoine and Giacomo we cam up with a hypothesis when greedy works better.
-
-        # X1 = np.random.rand(1000, dim)
-        # X2 = .1 * np.random.rand(5000, dim) + .5 * np.zeros((1, dim))
-        # X3 = .2 * np.random.rand(5000, dim) - .5 * np.zeros((1, dim))
-        # X = np.concatenate([X1, X2, X3], axis=0)
-        #
-        # y = np.ones((X.shape[0], 1))
-        # y[X[:, 0] > .5] = -1
-
-        N_points = self.N_points
-
-        # Example 1: Peak at center, then decay. randn distribution.
-        # X = np.random.randn(N_points, dim)
-        # center = .5 * np.ones((1, dim))
-        # y = 1 / (1 + 5 * np.linalg.norm(X - center, axis=1, keepdims=True))
-        # y = y + 1e-3 * np.random.randn(y.shape[0], 1)
-
-
-        # Example2: Peak at center, then decay. sparse randn distribution and two clusters.
-        X1 = np.random.randn(int(.1*N_points), dim)
-        X2 = .1 * np.random.rand(int(.45*N_points), dim) + .5 * np.ones((1, dim))
-        X3 = .2 * np.random.rand(int(.45*N_points), dim) - .5 * np.ones((1, dim))
-        X = np.concatenate([X1, X2, X3], axis=0)
-
-        center = .5 * np.ones((1, dim))
-        y = 1 / (1 + 5 * np.linalg.norm(X - center, axis=1, keepdims=True))
-        y = y + 1e-3 * np.random.randn(y.shape[0], 1)
-
-        return X, y
-
-
-    def example_mnist(self, name_dataset):
-        """MNIST dataset"""
-
-        X_train, X_test, y_train, y_test = \
-            load_mnist_pair(get_basepath(),
-                            [int(name_dataset[-2]), int(name_dataset[-1])])
-
-        X = np.concatenate([X_train, X_test], axis=0)
-        y = np.concatenate([y_train, y_test], axis=0).reshape(-1, 1)
-
-        return X, y
-
-    def example_uci(self, name_dataset):
-        """Some UCI datasets"""
-
-        # path = '/usr/local/home/wenzeltn/deepkernel-pytorch2/data/UCI/'
-        path = '/home/wenzeltn/local_home/deepkernel-pytorch2/data/UCI/'
-
-        if name_dataset == 'uci_airfoil_self_noise':
-            data = np.loadtxt(path + 'airfoil_self_noise.dat', unpack=True).transpose()
-
-            X = data[:, :5]
-            y = data[:, [5]]
-
-            X = (X - X.mean(axis=0, keepdims=True)) / (X.std(axis=0, keepdims=True) + 1e-30)
-            y = (y - y.mean(axis=0, keepdims=True)) / (y.std(axis=0, keepdims=True) + 1e-30)
-
-        if name_dataset == 'uci_CCPP':
-            import pandas as pd
-            data = pd.read_excel(path + 'CCPP/' + 'Folds5x2_pp.xlsx').to_numpy()
-
-            X = data[:, :4]
-            y = data[:, [4]]
-
-            X = (X - X.mean(axis=0, keepdims=True)) / (X.std(axis=0, keepdims=True) + 1e-30)
-            y = (y - y.mean(axis=0, keepdims=True)) / (y.std(axis=0, keepdims=True) + 1e-30)
-
-        return X, y
-
-    def example_TUD(self):
-        """Felix Döppel data"""
-
-        path = '/home/wenzeltn/local_home/deepkernel-pytorch2/P01_DK_optimization/data/'
-        # path = '/usr/local/home/wenzeltn/deepkernel-pytorch2/P01_DK_optimization/data/'
-        mat_train = spio.loadmat(path + 'DataTUDKinetics.mat', squeeze_me=True)
-        # mat_test = spio.loadmat(path + 'TestTUDKinetics.mat', squeeze_me=True)
-
-        X = mat_train['input']
-        y = mat_train['output'].reshape(-1, 1)
-
-        return X, y
 
     def example_holzmuller(self, ds_name):
         """Data set from David Holzmüller paper of batch active learning."""
 
-        if os.path.exists('/usr/local/home/wenzeltn'):   # anm03
-            path = '/usr/local/home/wenzeltn/deepkernel-pytorch2/data/data_holzmuller/'
-        elif os.path.exists('/usr/local/storage/wenzeltn'):  # ianscluster
-            path = '/usr/local/storage/wenzeltn/deepkernel-pytorch2/data/data_holzmuller/'
-        elif os.path.exists('/home/wenzeltn/'):     # laptop
-            path = '/home/wenzeltn/local_home/deepkernel-pytorch2/data/data_holzmuller/'
+        path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'data/'))
 
         X = np.load(path + ds_name + '/X.npy')
         y = np.load(path + ds_name + '/y.npy')
@@ -199,93 +59,3 @@ class Dataset():
 
         return X, y
 
-    def toyexample(self, ds_name):
-        """Toy datasets from Tizian created on 04.01.23."""
-
-        basepath = utilities.get_basepath()
-        path = basepath + 'data/TOY_selfcreated/'
-
-        X = np.load(path + ds_name + 'X.npy')
-        y = np.load(path + ds_name + 'y.npy')
-
-        # Standardization of input according to David H (reduce effect of outliers)
-        X = (X - X.mean(axis=0, keepdims=True)) / (X.std(axis=0, keepdims=True) + 1e-30)
-        X = 5 * np.tanh(0.2 * X)
-
-        y = (y - y.mean(axis=0, keepdims=True)) / (y.std(axis=0, keepdims=True) + 1e-30)
-
-        return X, y
-
-
-    def falkon_datasets(self, ds_name):
-        """Datasets from FALKON paper."""
-
-        path = None
-        if os.path.exists('/usr/local/home/wenzeltn'):   # anm03
-            path = '/usr/local/home/wenzeltn/deepkernel-pytorch2/data/FALKON_data/'
-        elif os.path.exists('/usr/local/storage/wenzeltn'):  # ianscluster
-            path = '/usr/local/storage/wenzeltn/deepkernel-pytorch2/data/FALKON_data/'
-        elif os.path.exists('/home/wenzeltn/'):
-            pass
-
-        filename = None
-        if ds_name == 'susy' or ds_name == 'higgs':
-            if ds_name == 'susy':
-                filename = 'Susy.mat'
-            else:
-                filename = 'Higgs.mat'
-
-            with h5py.File(path + filename, "r") as h5py_file:
-                arr = np.asarray(h5py_file['X'], dtype=np.float32).T
-            X = arr[:, 1:]
-            y = arr[:, 0].reshape(-1, 1)
-
-            # Preprocess input
-            mtr = np.mean(X, axis=0, dtype=np.float64, keepdims=True).astype(X.dtype)
-            vtr = np.var(X, axis=0, dtype=np.float64, ddof=1, keepdims=True).astype(X.dtype)
-
-            X -= mtr
-            X /= vtr
-
-            # Preprocess outputs
-            y = y * 2 - 1
-
-
-        elif ds_name == 'timit':    # num_train_samples = 1124823
-            pass
-            # ToDo: Does not work, dataset probably damaged
-
-            # filename = 'TIMIT.mat'
-            #
-            # with h5py.File(path + filename, 'r') as h5py_file:
-            #     dtype = np.float32
-            #     Xtr = np.array(h5py_file['Xtr'], dtype=dtype)
-            #     Xts = np.array(h5py_file['Xts'], dtype=dtype)
-            #     Ytr = np.array(h5py_file['Ytr'], dtype=dtype).reshape((-1,))
-            #     Yts = np.array(h5py_file['Yts'], dtype=dtype).reshape((-1,))
-            #     X = np.concatenate((Xtr, Xts), axis=0)
-            #     y = np.concatenate((Ytr, Yts), axis=0)
-            #
-            #
-            # #
-            # # f = spio.loadmat(path + filename)
-            # # dtype = np.float32
-            # # Xtr = np.array(f['Xtr'], dtype=dtype)
-            # # Xts = np.array(f['Xts'], dtype=dtype)
-            # # Ytr = np.array(f['Ytr'], dtype=dtype).reshape((-1,))
-            # # Yts = np.array(f['Yts'], dtype=dtype).reshape((-1,))
-            # # X = np.concatenate((Xtr, Xts), axis=0)
-            # # y = np.concatenate((Ytr, Yts), axis=0)
-
-        # elif ds_name == 'flights':
-        #     filename = 'flights.csv'
-        #     # Not implemented, because I have a .csv file, Giacomo uses an .hdf5 file
-        #
-        # elif ds_name == 'taxi':
-        #     pass
-        #     # Not implemented because super large
-
-        # ToDo: Not implemented: Taxi since very large, flights only in .csv, timit is broken
-
-        return X, y
-
diff --git a/utils/hyperparameters.py b/utils/hyperparameters.py
index 02469ee..9cd1ca1 100644
--- a/utils/hyperparameters.py
+++ b/utils/hyperparameters.py
@@ -22,54 +22,6 @@ class example_func_approx():
     k_matern = 0
 
 
-class example_2d(example_func_approx):
-    pass
-
-class example_10d_vanishing(example_func_approx):
-    flag_gaussian = True
-
-# class example_toy():
-#     maxIter_vkoga = 250
-#     N_points = 10000
-#     noise_level = 1e-3
-#
-#     reg_para_optim = 1e-5       # for kernel optimization
-#     reg_para_vkoga = 0          # for running VKOGA
-#     learning_rate = 5e-3
-#     n_epochs = 25
-#     batch_size = 64
-#     n_folds = None
-#
-#     flag_initialize_diagonal = True
-#     flag_symmetric = False
-#     flag_gaussian = False
-#
-#     n_cross_val = 10
-#     shape_para = 1
-#     k_matern = 0
-
-
-class example_TUD():
-    maxIter_vkoga = 500
-    N_points = None
-    noise_level = 0
-
-    reg_para_optim = 1e-5       # for kernel optimization
-    reg_para_vkoga = 0          # for running VKOGA
-    learning_rate = 5e-3
-    n_epochs = 50
-    batch_size = 64
-    n_folds = None
-
-    flag_initialize_diagonal = True
-    flag_symmetric = False
-    flag_gaussian = True
-
-    n_cross_val = 10
-    shape_para = 1
-    k_matern = 0
-
-
 class example_holzmuller():
     maxIter_vkoga = 1000
     N_points = None
@@ -90,48 +42,15 @@ class example_holzmuller():
     shape_para = 1 / 5
     k_matern = 0
 
+
 class example_holzmuller_few_epochs(example_holzmuller):
     n_epochs = 10
 
 
-
-class example_mnist(example_holzmuller):
-    n_epochs = 5
-    maxIter_vkoga = 200
-
-class example_airfoil(example_holzmuller):
-    n_epochs = 25
-    learning_rate = 2e-2
-    maxIter_vkoga = 500
-    k_matern = 0
-
-class example_CCPP(example_holzmuller):
-    n_epochs = 25
-    learning_rate = 2e-2
-    maxIter_vkoga = 500
-    k_matern = 0
-
-
-
 dic_hyperparams = {
-    'example_2d': example_func_approx(),
-    'example_2d_tiz': example_func_approx(),
-    'example_2d_radial': example_func_approx(),
-    'example_2d_active_1': example_func_approx(),
-    'example_2d_franke': example_func_approx(),
-    'example_3d_active_1': example_func_approx(),
-    'example_3d_active_2': example_func_approx(),
-    'example_10d_active': example_func_approx(),
-    'example_10d_vanishing': example_func_approx(),
-    'example_10d_radial': example_func_approx(),
-    'example_5d_radial': example_func_approx(),
-    'example_5d_active': example_func_approx(),
     'example_5d_faster_conv': example_func_approx(),
     'example_6d_kink': example_func_approx(),
     'example_7d_semiactive': example_func_approx(),
-    'example_winkle': example_func_approx(),
-    'example_TUD': example_TUD(),
-
 
     'ct': example_holzmuller_few_epochs(),
     'diamonds': example_holzmuller_few_epochs(),
@@ -147,17 +66,6 @@ dic_hyperparams = {
     'sarcos': example_holzmuller_few_epochs(),
     'sgemm': example_holzmuller_few_epochs(),
     'stock': example_holzmuller_few_epochs(),
-    'wecs': example_holzmuller_few_epochs(),
-
-    'susy': example_holzmuller_few_epochs(),
-    'higgs': example_holzmuller_few_epochs(),
-
-
-    'example_mnist_01': example_mnist(),
-    'example_mnist_34': example_mnist(),
-
-    'uci_airfoil_self_noise': example_airfoil(),
-    'uci_CCPP': example_CCPP(),
-
+    'wecs': example_holzmuller_few_epochs()
 }
 
diff --git a/utils/kernels.py b/utils/kernels.py
index 0d22347..fd34de5 100644
--- a/utils/kernels.py
+++ b/utils/kernels.py
@@ -5,6 +5,7 @@ from scipy.spatial import distance_matrix
 import numpy as np
 import matplotlib.pyplot as plt
 
+
 # Abstract kernel
 class Kernel(ABC):
     @abstractmethod    
diff --git a/utils/main_function.py b/utils/main_function.py
index ea871ee..79f5010 100644
--- a/utils/main_function.py
+++ b/utils/main_function.py
@@ -1,29 +1,25 @@
 
 
-from P36_Francesco_Emma.models.optimized_kernel import OptimizedKernel
-from P36_Francesco_Emma.utilities.dataset_collection import Dataset
-from P36_Francesco_Emma.utilities.hyperparameters import dic_hyperparams
+from utils.optimized_kernel import OptimizedKernel
+from utils.dataset_collection import Dataset
+from utils.hyperparameters import dic_hyperparams
 
-from vkoga import tkernels, kernels
+from utils import tkernels, kernels
 import torch
 from matplotlib import pyplot as plt
 import numpy as np
 import time
 
-from sklearn.model_selection import train_test_split
-from vkoga.kernels import Matern
-from vkoga.vkoga import VKOGA
+from utils.vkoga import VKOGA
 import os
 
 from datetime import datetime
-import utilities
 
 
 ## Some settings
 list_nctrs = [int(np.round(nr)) for nr in np.logspace(np.log(10) / np.log(10), np.log(1000) / np.log(10), 10)]
 
-basepath = utilities.get_basepath()
-path_for_indices = basepath + 'data/'
+path_for_indices = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'data/'))
 
 
 # Main functiono to run kernel optimization with subsequent vkoga
diff --git a/utils/optimized_kernel.py b/utils/optimized_kernel.py
index 423faf4..fb3aae3 100644
--- a/utils/optimized_kernel.py
+++ b/utils/optimized_kernel.py
@@ -1,6 +1,6 @@
 from torch import nn
 import torch
-from P36_Francesco_Emma.utilities.cv_rippa_ext import compute_cv_loss_via_rippa_ext_2
+from utils.cv_rippa_ext import compute_cv_loss_via_rippa_ext_2
 import numpy as np
 
 
diff --git a/utils/vkoga.py b/utils/vkoga.py
index 1731605..5e7fd4b 100644
--- a/utils/vkoga.py
+++ b/utils/vkoga.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 
-from vkoga.kernels import Gaussian
+from utils.kernels import Gaussian
 import numpy as np
 from sklearn.base import BaseEstimator
 from sklearn.utils.validation import check_X_y, check_array, check_is_fitted
-- 
GitLab