diff --git a/section_4.2_visualize.py b/section_4.2_visualize.py index 457a66f1fa5bb5dd5ff3e74848328c1cd75364d5..3b51c2f696be6e5986764531ace6dbfb77f28222 100644 --- a/section_4.2_visualize.py +++ b/section_4.2_visualize.py @@ -30,10 +30,12 @@ path_for_results = os.path.abspath(os.path.join(os.path.dirname(__file__), 'resu dic_singular_vals = {} dic_accuracy_deep = {} dic_accuracy_beststd = {} +dic_timings = {} for name_dataset in list_datasets: dic_singular_vals[name_dataset] = {} dic_accuracy_deep[name_dataset] = {} dic_accuracy_beststd[name_dataset] = {} + dic_timings[name_dataset] = {'list_timings_1L': [], 'list_timings_2L': []} ## Read all file, perform computations, save results @@ -57,10 +59,13 @@ for idx_file, file in enumerate(os.listdir(path_for_results)): dic_singular_vals[name_dataset][idx_rerun] = ratio - # Compute and store accuracies + # Compute and store accuracies as well as timings dic_accuracy_deep[name_dataset][idx_rerun] = results['array_test_rmse_deep'] dic_accuracy_beststd[name_dataset][idx_rerun] = np.min(results['array_test_rmse_cv'], axis=0) + dic_timings[name_dataset]['list_timings_1L'].append(np.array(results['list_timings_1L'])) + dic_timings[name_dataset]['list_timings_2L'].append(np.array(results['list_timings_2L'])) + ## Calculate ranking where 2L performs best to worst: Use the average mean of improvement for this array_ratio = np.zeros(len(list_datasets)) @@ -78,10 +83,28 @@ indices_sorted = np.argsort(array_ratio) ## Print the calculated ratio: This shows, when 2L is superior +print('List indicating when two-layered kernel optimization is beneficial:') for idx_sorted in indices_sorted: print('{:20}'.format(list_datasets[idx_sorted]), np.round(array_ratio[idx_sorted], 5)) +## Print timing numbers +for idx_dataset, name_dataset in enumerate(list_datasets): + array_timings_1L = np.stack(dic_timings[name_dataset]['list_timings_1L']) + array_timings_2L = np.stack(dic_timings[name_dataset]['list_timings_2L']) + + t_mean_1L = np.mean(array_timings_1L) + t_std_1L = np.std(array_timings_1L) + + t_mean_2L = np.mean(array_timings_2L, axis=0) + t_std_2L = np.std(array_timings_2L, axis=0) + + print('1L runtime: {:6.2f} +- {:5.2f}s. '.format(t_mean_1L, t_std_1L) + + '2L optim: {:6.2f} +- {:5.2f}. '.format(t_mean_2L[0, 0], t_std_2L[0, 0]) + + '2L greedy: {:6.2f} +- {:5.2f}. '.format(t_mean_2L[0, 1], t_std_2L[0, 1]) + + '(' + name_dataset + ')') + + ## Visualization of the ratio of singular values compared to sum of all singular values matplotlib.use('TKAgg')