Skip to content
Snippets Groups Projects
Commit d66afd0c authored by Tizian Wenzel's avatar Tizian Wenzel
Browse files

Final updates.

parent 990c1c06
No related branches found
No related tags found
No related merge requests found
openml==0.13.1
scipy==1.7.3
scikit-learn==1.0.2
requests==2.31.0
pandas==1.3.5
mat4py==0.5.0
dill==0.3.6
matplotlib==3.5.3
......@@ -4,9 +4,8 @@
# on the unit cube" to produce the plots within Figure 2.
import torch
from utils.main_function import run_everything, run_cross_validation
from utils.hyperparameters import dic_hyperparams
from utils_code.main_function import run_everything, run_cross_validation
from utils_code.hyperparameters import dic_hyperparams
import numpy as np
from matplotlib import pyplot as plt
......@@ -50,7 +49,7 @@ array_eps, array_cv_f, array_cv_f_val, _, list_timings_1L = run_cross_validation
## Store in matlab for beautiful tikzfigure plots
path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), 'results_5reruns/'))
path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), 'results_5reruns/')) + '/'
os.makedirs(path_for_results, exist_ok=True)
io.savemat(path_for_results + name_dataset + '.mat',
......
......@@ -15,10 +15,8 @@ import numpy as np
np.random.seed(1)
list_datasets = ['fried', 'sarcos', 'protein', 'ct', 'diamonds',
'stock', 'kegg_undir_uci', 'online_video',
'wecs', 'mlr_knn_rng', 'query_agg_count',
'sgemm', 'road_network', 'methane', 'poker'] #, 'susy', 'higgs']
list_datasets = ['fried', 'sarcos', 'ct', 'diamonds', 'stock', 'kegg_undir_uci', 'online_video',
'wecs', 'mlr_knn_rng', 'query_agg_count', 'sgemm', 'road_network']
## Loop over reruns and datasets
......@@ -55,7 +53,7 @@ for idx_indices in [0, 1, 2, 3, 4]:
## Store in matlab for beautiful tikzfigure plots
path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), 'results_5reruns/'))
path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), 'results_5reruns/')) + '/'
os.makedirs(path_for_results, exist_ok=True)
io.savemat(path_for_results + name_dataset + '_{}'.format(idx_indices) + '.mat',
......
......@@ -9,19 +9,17 @@ from matplotlib import pyplot as plt
from scipy import io
import os
import scipy
import numpy as np
import matplotlib
np.random.seed(1)
## Some settings
list_datasets = ['fried', 'sarcos', 'protein', 'ct', 'diamonds',
'stock', 'kegg_undir_uci', 'online_video',
'wecs', 'mlr_knn_rng', 'query_agg_count',
'sgemm', 'road_network', 'methane', 'poker'] #, 'susy', 'higgs']
list_datasets = ['fried', 'sarcos', 'ct', 'diamonds', 'stock', 'kegg_undir_uci', 'online_video',
'wecs', 'mlr_knn_rng', 'query_agg_count', 'sgemm', 'road_network']
path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'results_5reruns/'))
path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), 'results_5reruns/')) + '/'
## Initialize dictionaries to store several quantities
......@@ -82,11 +80,12 @@ indices_sorted = np.argsort(array_ratio)
## Print the calculated ratio: This shows, when 2L is superior
for idx_sorted in indices_sorted:
print('{:20}'.format(list_datasets[idx_sorted]), np.round(array_ratio[idx_sorted], 5))
indices_sorted = indices_sorted[:-3] # cut off last three datasets
## Visualization of the ratio of singular values compared to sum of all singular values
R = np.linspace(0, 1, int(1.0*len(list_datasets)))
matplotlib.use('TKAgg')
R = np.linspace(0, 1, int(1.5*len(list_datasets)))
array_color=plt.cm.hsv(R)
dic_to_store = {}
......@@ -106,7 +105,7 @@ for idx_sorted in indices_sorted[::-1]:
dic_to_store[name_dataset] = array
plt.legend(list_legend)
plt.draw()
## Store in matlab for beautiful tikzfigure plots
# io.savemat('dic_singular_value_ratio.mat', dic_to_store)
......
......@@ -9,9 +9,9 @@ import numpy as np
from scipy.stats import ortho_group # Requires version 0.18 of scipy
import pickle
from utils.dataset_collection import Dataset
from utils.hyperparameters import dic_hyperparams
from utils.main_function import run_everything
from utils_code.dataset_collection import Dataset
from utils_code.hyperparameters import dic_hyperparams
from utils_code.main_function import run_everything
np.random.seed(1)
......@@ -20,16 +20,14 @@ np.random.seed(1)
## Some settings
list_nctrs = [int(np.round(nr)) for nr in np.logspace(np.log(10) / np.log(10), np.log(1000) / np.log(10), 10)]
path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), 'results_stability/'))
path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), 'results_stability/')) + '/'
flag_gaussian = False
n_reruns = 5
list_datasets = ['fried', 'sarcos', 'protein', 'ct', 'diamonds',
'stock', 'kegg_undir_uci', 'online_video',
'wecs', 'mlr_knn_rng', 'query_agg_count',
'sgemm', 'road_network', 'methane', 'poker'] #, 'susy', 'higgs']
list_datasets = ['fried', 'sarcos', 'ct', 'diamonds', 'stock', 'kegg_undir_uci', 'online_video',
'wecs', 'mlr_knn_rng', 'query_agg_count', 'sgemm', 'road_network']
## Loop over all datasets
......@@ -44,7 +42,7 @@ for idx_dataset, name_dataset in enumerate(list_datasets):
list_A_start = [np.eye(X.shape[1]) for _ in range(n_reruns)]
# Go only for a fixed set of indices - do not aim at error bars plot!
list_idx = [1, 2, 3, 4]
list_idx = [0, 1, 2, 3, 4]
## Loop to compute and store all initial and final matrices for the different nfolds optimizations
dic_results = {}
......@@ -60,7 +58,7 @@ for idx_dataset, name_dataset in enumerate(list_datasets):
## Run everything for 2L
for nfold in list_nfolds:
A_start, A_optimized, model, model_vkoga1, model_vkoga2, data, _, array_test_rmse_deep = run_everything(
A_start, A_optimized, model, model_vkoga1, model_vkoga2, data, _, array_test_rmse_deep, _ = run_everything(
name_dataset,
hyperparameter.maxIter_vkoga, hyperparameter.N_points,
hyperparameter.noise_level, hyperparameter.reg_para_optim, hyperparameter.reg_para_vkoga,
......
......@@ -19,12 +19,15 @@ np.random.seed(1)
## Some settings
list_nctrs = [int(np.round(nr)) for nr in np.logspace(np.log(10) / np.log(10), np.log(1000) / np.log(10), 10)]
path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), 'results_stability/'))
path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), 'results_stability/')) + '/'
## First, collect results from all files in a common dictionary
dic_results = {} # dic_results --> name_dataset --> idx_index --> nfold --> [A_start, A_optimized, model_vkoga1.train_hist['f'], array_test_rmse_deep]
for idx_file, file in enumerate(os.listdir(path_for_results)):
if 'README' in file:
continue
idx_index = int(file.split('_')[2])
name_dataset = ('_'.join(file.split('_')[3:])).split('.')[:-1][0]
print(name_dataset)
......@@ -157,9 +160,9 @@ for idx_dataset, name_dataset in enumerate(list(dic_results.keys())):
plt.yscale('log')
## Store in matlab for beautiful tikzfigure plots
io.savemat('dic_stabilityalignment_' + name_dataset + '_nfolds.mat', dic_alignment)
io.savemat('dic_singvals_' + name_dataset + '_nfolds.mat', dic_singvalsarray)
# ## Store in matlab for beautiful tikzfigure plots
# io.savemat('dic_stabilityalignment_' + name_dataset + '_nfolds.mat', dic_alignment)
# io.savemat('dic_singvals_' + name_dataset + '_nfolds.mat', dic_singvalsarray)
if idx_dataset > 20:
break
......
......@@ -46,7 +46,7 @@ class Dataset():
def example_holzmuller(self, ds_name):
"""Data set from David Holzmüller paper of batch active learning."""
path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'data/data/'))
path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'data/data/')) + '/'
X = np.load(path + ds_name + '/X.npy')
y = np.load(path + ds_name + '/y.npy')
......
......@@ -19,7 +19,7 @@ from datetime import datetime
## Some settings
list_nctrs = [int(np.round(nr)) for nr in np.logspace(np.log(10) / np.log(10), np.log(1000) / np.log(10), 10)]
path_for_indices = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'data/'))
path_for_indices = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'data/')) + '/'
# Main functiono to run kernel optimization with subsequent vkoga
......
from torch import nn
import torch
from utils.cv_rippa_ext import compute_cv_loss_via_rippa_ext_2
from utils_code.cv_rippa_ext import compute_cv_loss_via_rippa_ext_2
import numpy as np
......
......@@ -3,7 +3,7 @@ import os
# This file allows to configure where to save data, results, plots etc.
class CustomPaths:
# path where downloaded data sets will be saved
data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'data/'))
data_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'data/')) + '/'
# path where benchmark results will be saved
results_path = 'results'
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment