From 1ecfa7cf325b9d02f322e29a040ec60fca54b827 Mon Sep 17 00:00:00 2001 From: Tizian Wenzel <wenzeltn@nbanm02.mathematik.uni-stuttgart.de> Date: Sat, 10 Jun 2023 18:18:33 +0200 Subject: [PATCH] Update on the download_data.py file to fix broken imports. --- utils_data/download_data.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/utils_data/download_data.py b/utils_data/download_data.py index 79497a0..99f1955 100644 --- a/utils_data/download_data.py +++ b/utils_data/download_data.py @@ -401,13 +401,13 @@ def import_all(): proc = PandasTaskPreprocessor(min_n_samples=30000, max_tvp_samples=200000, max_test_samples=300000, max_one_hot_columns=300) - proc.apply(PandasTask.from_uci('https://archive.ics.uci.edu/ml/machine-learning-databases/00440/sgemm_product_dataset.zip', - ds_name='sgemm', zip_name='sgemm_product_dataset.zip', csv_name='sgemm_product.csv', + proc.apply(PandasTask.from_uci('https://archive.ics.uci.edu/static/public/440/sgemm+gpu+kernel+performance.zip', + ds_name='sgemm', zip_name='sgemm+gpu+kernel+performance.zip', csv_name='sgemm_product.csv', target_col_idxs=[14, 15, 16, 17], use_log_target=True)) - proc.apply(PandasTask.from_uci('https://archive.ics.uci.edu/ml/machine-learning-databases/00206/slice_localization_data.zip', - ds_name='ct', zip_name='slice_localization_data.zip', csv_name='slice_localization_data.csv', + proc.apply(PandasTask.from_uci('https://archive.ics.uci.edu/static/public/206/relative+location+of+ct+slices+on+axial+axis.zip', + ds_name='ct', zip_name='relative+location+of+ct+slices+on+axial+axis.zip', csv_name='slice_localization_data.csv', target_col_idxs=[385], ignore_col_idxs=[0])) - proc.apply(PandasTask.from_uci('https://archive.ics.uci.edu/ml/machine-learning-databases/00221/Reaction%20Network%20(Undirected).data', + proc.apply(PandasTask.from_uci('https://data.world/uci/kegg-metabolic-reaction-network-undirected', ds_name='kegg_undir_uci', zip_name='kegg_undir_uci.csv', csv_name='kegg_undir_uci.csv', target_col_idxs=[26], ignore_col_idxs=[0], has_header=False, continuous_nan_columns=[4])) # only use the Sydney part of the data set (could as well have used Adelaide, Perth or Tasmania) @@ -443,4 +443,4 @@ def import_all(): ignore_columns=['MM263', 'MM256'])) proc.apply(PandasTask.from_openml_dataset_id(dataset_id=42225, ds_name='diamonds', target='price')) proc.apply(PandasTask.from_openml_dataset_id(dataset_id=564, ds_name='fried', target='Y')) - proc.apply(PandasTask.from_openml_dataset_id(dataset_id=42903, ds_name='protein', target='RMSD')) \ No newline at end of file + proc.apply(PandasTask.from_openml_dataset_id(dataset_id=42903, ds_name='protein', target='RMSD')) -- GitLab