From cee8defa5cd1c9eb960f1aeeb880d3be57cb8c98 Mon Sep 17 00:00:00 2001 From: Julien St-Laurent Date: Wed, 30 Apr 2025 13:07:07 -0400 Subject: [PATCH 1/2] chore: Update bucket path --- .github/workflows/test_ipu.yml | 6 +++--- .../choosing_parallelization.ipynb | 4 ++-- .../feature_processing/timing_parallel.ipynb | 4 ++-- expts/configs/config_gps_10M_pcqm4m.yaml | 6 +++--- expts/configs/config_gps_10M_pcqm4m_mod.yaml | 6 +++--- expts/configs/config_mpnn_10M_b3lyp.yaml | 10 ++++----- expts/configs/config_mpnn_pcqm4m.yaml | 6 +++--- .../loss_metrics_datamodule/l1000_mcf7.yaml | 4 ++-- .../loss_metrics_datamodule/l1000_vcap.yaml | 4 ++-- .../loss_metrics_datamodule/largemix.yaml | 20 +++++++++--------- .../loss_metrics_datamodule/pcba_1328.yaml | 4 ++-- .../tasks/loss_metrics_datamodule/pcqm4m.yaml | 6 +++--- .../loss_metrics_datamodule/pcqm4m_g25.yaml | 4 ++-- .../loss_metrics_datamodule/pcqm4m_n4.yaml | 4 ++-- .../tasks/loss_metrics_datamodule/toymix.yaml | 12 +++++------ expts/main_run_get_fingerprints.py | 2 +- .../base_config/large.yaml | 20 +++++++++--------- .../base_config/large_pcba.yaml | 20 +++++++++--------- .../base_config/large_pcqm_g25.yaml | 20 +++++++++--------- .../base_config/large_pcqm_n4.yaml | 21 ++++++++++--------- .../base_config/small.yaml | 12 +++++------ .../baseline/config_small_gcn_baseline.yaml | 12 +++++------ .../config_classifigression_l1000.yaml | 8 +++---- .../config_large_gcn_gpu.yaml | 20 +++++++++--------- .../neurips2023_configs/config_luis_jama.yaml | 4 ++-- .../config_small_gcn_gpu.yaml | 6 +++--- .../config_large_gcn_mcf7.yaml | 4 ++-- .../config_large_gcn_pcba.yaml | 4 ++-- .../config_large_gcn_vcap.yaml | 4 ++-- .../single_task_gin/config_large_gin_g25.yaml | 4 ++-- .../config_large_gin_mcf7.yaml | 4 ++-- .../single_task_gin/config_large_gin_n4.yaml | 4 ++-- .../config_large_gin_pcba.yaml | 4 ++-- .../single_task_gin/config_large_gin_pcq.yaml | 8 +++---- .../config_large_gin_vcap.yaml | 4 ++-- .../config_large_gine_g25.yaml | 4 ++-- .../config_large_gine_mcf7.yaml | 4 ++-- .../config_large_gine_n4.yaml | 4 ++-- .../config_large_gine_pcba.yaml | 4 ++-- .../config_large_gine_pcq.yaml | 8 +++---- .../config_large_gine_vcap.yaml | 4 ++-- graphium/data/utils.py | 2 +- install_ipu.sh | 2 +- notebooks/dev-pretrained.ipynb | 2 +- ...g-fingerprints-from-pretrained-model.ipynb | 4 ++-- profiling/configs_profiling.yaml | 4 ++-- profiling/profile_predictor.py | 2 +- tests/config_test_ipu_dataloader.yaml | 2 +- .../config_test_ipu_dataloader_multitask.yaml | 6 +++--- tests/test_training.py | 2 +- 50 files changed, 170 insertions(+), 169 deletions(-) diff --git a/.github/workflows/test_ipu.yml b/.github/workflows/test_ipu.yml index 886c4c2b7..bf27e5e08 100644 --- a/.github/workflows/test_ipu.yml +++ b/.github/workflows/test_ipu.yml @@ -47,9 +47,9 @@ jobs: python -c "import poptorch" # Download the datafiles (Total ~ 10Mb - nothing compared to the libraries) - wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/ZINC12k.csv.gz - wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/Tox21-7k-12-labels.csv.gz - wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/qm9.csv.gz + wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/ZINC12k.csv.gz + wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/Tox21-7k-12-labels.csv.gz + wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/qm9.csv.gz # Install the IPU specific and graphium requirements diff --git a/docs/tutorials/feature_processing/choosing_parallelization.ipynb b/docs/tutorials/feature_processing/choosing_parallelization.ipynb index 0ab569d57..ef6fcccf9 100644 --- a/docs/tutorials/feature_processing/choosing_parallelization.ipynb +++ b/docs/tutorials/feature_processing/choosing_parallelization.ipynb @@ -64,8 +64,8 @@ "# download from https://raw.githubusercontent.com/aspuru-guzik-group/chemical_vae/master/models/zinc_properties/250k_rndm_zinc_drugs_clean_3.csv\n", "# data = pd.read_csv(\"/home/hadim/250k_rndm_zinc_drugs_clean_3.csv\", usecols=[\"smiles\"])\n", "\n", - "# download from https://storage.googleapis.com/graphium-public/datasets/QM9/norm_qm9.csv\n", - "data = pd.read_csv(\"https://storage.googleapis.com/graphium-public/datasets/QM9/norm_qm9.csv\", usecols=[\"smiles\"])" + "# download from https://storage.valencelabs.com/graphium/datasets/QM9/norm_qm9.csv\n", + "data = pd.read_csv(\"https://storage.valencelabs.com/graphium/datasets/QM9/norm_qm9.csv\", usecols=[\"smiles\"])" ] }, { diff --git a/docs/tutorials/feature_processing/timing_parallel.ipynb b/docs/tutorials/feature_processing/timing_parallel.ipynb index 477251e71..3191cdb0c 100644 --- a/docs/tutorials/feature_processing/timing_parallel.ipynb +++ b/docs/tutorials/feature_processing/timing_parallel.ipynb @@ -65,8 +65,8 @@ "# download from https://raw.githubusercontent.com/aspuru-guzik-group/chemical_vae/master/models/zinc_properties/250k_rndm_zinc_drugs_clean_3.csv\n", "# data = pd.read_csv(\"/home/hadim/250k_rndm_zinc_drugs_clean_3.csv\", usecols=[\"smiles\"])\n", "\n", - "# download from https://storage.googleapis.com/graphium-public/datasets/QM9/norm_qm9.csv\n", - "data = pd.read_csv(\"https://storage.googleapis.com/graphium-public/datasets/QM9/norm_qm9.csv\", usecols=[\"smiles\"])" + "# download from https://storage.valencelabs.com/graphium/datasets/QM9/norm_qm9.csv\n", + "data = pd.read_csv(\"https://storage.valencelabs.com/graphium/datasets/QM9/norm_qm9.csv\", usecols=[\"smiles\"])" ] }, { diff --git a/expts/configs/config_gps_10M_pcqm4m.yaml b/expts/configs/config_gps_10M_pcqm4m.yaml index 10faa3b1e..faf97fa3f 100644 --- a/expts/configs/config_gps_10M_pcqm4m.yaml +++ b/expts/configs/config_gps_10M_pcqm4m.yaml @@ -66,12 +66,12 @@ datamodule: df: null task_level: "graph" df_path: ~/scratch/data/graphium/data/PCQM4M/pcqm4mv2-20k.csv - # wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2-20k.csv - # or set path as https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2-20k.csv directly + # wget https://storage.valencelabs.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2-20k.csv + # or set path as https://storage.valencelabs.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2-20k.csv directly smiles_col: "cxsmiles" label_cols: ["homo_lumo_gap"] # sample_size: 30000 # use sample_size for test - # splits_path: graphium/data/PCQM4M/split_dict_v2.pt # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt` + # splits_path: graphium/data/PCQM4M/split_dict_v2.pt # Download with `wget https://storage.valencelabs.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt` split_val: 0.1 split_test: 0.1 diff --git a/expts/configs/config_gps_10M_pcqm4m_mod.yaml b/expts/configs/config_gps_10M_pcqm4m_mod.yaml index e2cdb44c2..763a02432 100644 --- a/expts/configs/config_gps_10M_pcqm4m_mod.yaml +++ b/expts/configs/config_gps_10M_pcqm4m_mod.yaml @@ -15,12 +15,12 @@ datamodule: df: null task_level: "graph" df_path: ~/scratch/data/graphium/data/PCQM4M/pcqm4mv2-20k.csv - # wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2-20k.csv - # or set path as https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2-20k.csv directly + # wget https://storage.valencelabs.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2-20k.csv + # or set path as https://storage.valencelabs.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2-20k.csv directly smiles_col: "cxsmiles" label_cols: ["homo_lumo_gap"] # sample_size: 30000 # use sample_size for test - # splits_path: graphium/data/PCQM4Mv2/split_dict.pt # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict.pt` + # splits_path: graphium/data/PCQM4Mv2/split_dict.pt # Download with `wget https://storage.valencelabs.com/datasets-public-research/PCQM4M/cxsmiles/split_dict.pt` split_val: 0.1 split_test: 0.1 diff --git a/expts/configs/config_mpnn_10M_b3lyp.yaml b/expts/configs/config_mpnn_10M_b3lyp.yaml index c385d7689..edb8f28d6 100644 --- a/expts/configs/config_mpnn_10M_b3lyp.yaml +++ b/expts/configs/config_mpnn_10M_b3lyp.yaml @@ -67,8 +67,8 @@ datamodule: df: null task_level: "graph" df_path: graphium/data/b3lyp/b3lyp_mini.parquet #graphium/data/b3lyp/b3lyp_mini.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/b3lyp/b3lyp_mini.parquet - # or set path as https://storage.googleapis.com/graphium-public/datasets/b3lyp/b3lyp_mini.parquet directly + # wget https://storage.valencelabs.com/graphium/datasets/b3lyp/b3lyp_mini.parquet + # or set path as https://storage.valencelabs.com/graphium/datasets/b3lyp/b3lyp_mini.parquet directly smiles_col: "smiles" label_cols: ["beta_gap"] # sample_size: 30000 # use sample_size for test @@ -78,12 +78,12 @@ datamodule: df: null task_level: "graph" df_path: graphium/data/b3lyp/b3lyp_mini.parquet #graphium/data/b3lyp/b3lyp_mini.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/b3lyp/b3lyp_mini.parquet - # or set path as https://storage.googleapis.com/graphium-public/datasets/b3lyp/b3lyp_mini.parquet directly + # wget https://storage.valencelabs.com/graphium/datasets/b3lyp/b3lyp_mini.parquet + # or set path as https://storage.valencelabs.com/graphium/datasets/b3lyp/b3lyp_mini.parquet directly smiles_col: "smiles" label_cols: ["alpha_gap"] # sample_size: 30000 # use sample_size for test - # splits_path: graphium/data/PCQM4M/split_dict_v2.pt # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt` + # splits_path: graphium/data/PCQM4M/split_dict_v2.pt # Download with `wget https://storage.valencelabs.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt` split_val: 0.1 split_test: 0.1 diff --git a/expts/configs/config_mpnn_pcqm4m.yaml b/expts/configs/config_mpnn_pcqm4m.yaml index 9735f9555..912f4bd49 100644 --- a/expts/configs/config_mpnn_pcqm4m.yaml +++ b/expts/configs/config_mpnn_pcqm4m.yaml @@ -15,12 +15,12 @@ datamodule: df: null task_level: "graph" df_path: graphium/data/PCQM4M/pcqm4mv2-20k.csv - # wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2-20k.csv - # or set path as https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2-20k.csv directly + # wget https://storage.valencelabs.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2-20k.csv + # or set path as https://storage.valencelabs.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2-20k.csv directly smiles_col: "cxsmiles" label_cols: ["homo_lumo_gap"] # sample_size: 6000 # use sample_size for test - splits_path: graphium/data/PCQM4Mv2/split_dict_v2.pt # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt` + splits_path: graphium/data/PCQM4Mv2/split_dict_v2.pt # Download with `wget https://storage.valencelabs.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt` # graphium/data/PCQM4Mv2/split_dict.pt # graphium/data/PCQM4Mv2/pcqm4m_split.csv split_names: ["train", "valid", "test-dev"] diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_mcf7.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_mcf7.yaml index 43933a7fa..cacbba2b6 100644 --- a/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_mcf7.yaml +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_mcf7.yaml @@ -38,12 +38,12 @@ datamodule: l1000_mcf7: df: null df_path: ../data/graphium/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz # or set path as the URL directly smiles_col: "SMILES" label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: ../data/graphium/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` + splits_path: ../data/graphium/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` # split_names: [train, val, test_seen] epoch_sampling_fraction: 1.0 \ No newline at end of file diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_vcap.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_vcap.yaml index 27b89d862..ba77dcf00 100644 --- a/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_vcap.yaml +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/l1000_vcap.yaml @@ -38,12 +38,12 @@ datamodule: l1000_vcap: df: null df_path: ../data/graphium/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz # or set path as the URL directly smiles_col: "SMILES" label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: ../data/graphium/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` + splits_path: ../data/graphium/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` # split_names: [train, val, test_seen] epoch_sampling_fraction: 1.0 \ No newline at end of file diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/largemix.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/largemix.yaml index 921960cd1..9bdf0b59b 100644 --- a/expts/hydra-configs/tasks/loss_metrics_datamodule/largemix.yaml +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/largemix.yaml @@ -85,52 +85,52 @@ datamodule: l1000_vcap: df: null df_path: ../data/graphium/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz # or set path as the URL directly smiles_col: "SMILES" label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: ../data/graphium/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` + splits_path: ../data/graphium/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` # split_names: [train, val, test_seen] epoch_sampling_fraction: 1.0 l1000_mcf7: df: null df_path: ../data/graphium/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz # or set path as the URL directly smiles_col: "SMILES" label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: ../data/graphium/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` + splits_path: ../data/graphium/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` # split_names: [train, val, test_seen] epoch_sampling_fraction: 1.0 pcba_1328: df: null df_path: ../data/graphium/large-dataset/PCBA_1328_1564k.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet # or set path as the URL directly smiles_col: "SMILES" label_cols: assayID-* # assayID-* means all columns starting with "assayID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: ../data/graphium/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` + splits_path: ../data/graphium/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` # split_names: [train, val, test_seen] epoch_sampling_fraction: 1.0 pcqm4m_g25: df: null df_path: ../data/graphium/large-dataset/PCQM4M_G25_N4.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: graph_* # graph_* means all columns starting with "graph_" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: ../data/graphium/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + splits_path: ../data/graphium/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` # split_names: [train, val, test_seen] label_normalization: normalize_val_test: True @@ -140,13 +140,13 @@ datamodule: pcqm4m_n4: df: null df_path: ../data/graphium/large-dataset/PCQM4M_G25_N4.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: node_* # node_* means all columns starting with "node_" # sample_size: 2000 # use sample_size for test task_level: node - splits_path: ../data/graphium/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + splits_path: ../data/graphium/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` # split_names: [train, val, test_seen] seed: 42 label_normalization: diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/pcba_1328.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/pcba_1328.yaml index adc3321a0..a002911f8 100644 --- a/expts/hydra-configs/tasks/loss_metrics_datamodule/pcba_1328.yaml +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/pcba_1328.yaml @@ -30,12 +30,12 @@ datamodule: pcba_1328: df: null df_path: ../data/graphium/large-dataset/PCBA_1328_1564k.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet # or set path as the URL directly smiles_col: "SMILES" label_cols: assayID-* # assayID-* means all columns starting with "assayID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: ../data/graphium/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` + splits_path: ../data/graphium/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` # split_names: [train, val, test_seen] epoch_sampling_fraction: 1.0 \ No newline at end of file diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/pcqm4m.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/pcqm4m.yaml index d5b302dd1..b3f96a9e6 100644 --- a/expts/hydra-configs/tasks/loss_metrics_datamodule/pcqm4m.yaml +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/pcqm4m.yaml @@ -32,12 +32,12 @@ datamodule: df: null task_level: "graph" df_path: graphium/data/PCQM4M/pcqm4mv2.csv - # wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv - # or set path as https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv directly + # wget https://storage.valencelabs.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv + # or set path as https://storage.valencelabs.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2.csv directly smiles_col: "cxsmiles" label_cols: ["homo_lumo_gap"] # sample_size: 8000 # use sample_size for test - splits_path: graphium/data/PCQM4M/split_dict_v2.pt # Download with `wget https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt` + splits_path: graphium/data/PCQM4M/split_dict_v2.pt # Download with `wget https://storage.valencelabs.com/datasets-public-research/PCQM4M/cxsmiles/split_dict_v2.pt` split_names: ["train", "valid", "test-dev"] # graphium/data/PCQM4Mv2/split_dict.pt # graphium/data/PCQM4Mv2/pcqm4m_split.csv diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/pcqm4m_g25.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/pcqm4m_g25.yaml index 047701f6e..bc094053d 100644 --- a/expts/hydra-configs/tasks/loss_metrics_datamodule/pcqm4m_g25.yaml +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/pcqm4m_g25.yaml @@ -32,13 +32,13 @@ datamodule: pcqm4m_g25: df: null df_path: ../data/graphium/large-dataset/PCQM4M_G25_N4.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: graph_* # graph_* means all columns starting with "graph_" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: ../data/graphium/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + splits_path: ../data/graphium/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` # split_names: [train, val, test_seen] label_normalization: normalize_val_test: True diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/pcqm4m_n4.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/pcqm4m_n4.yaml index 494843464..0ab86459b 100644 --- a/expts/hydra-configs/tasks/loss_metrics_datamodule/pcqm4m_n4.yaml +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/pcqm4m_n4.yaml @@ -30,13 +30,13 @@ datamodule: pcqm4m_n4: df: null df_path: ../data/graphium/large-dataset/PCQM4M_G25_N4.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: node_* # node_* means all columns starting with "node_" # sample_size: 2000 # use sample_size for test task_level: node - splits_path: ../data/graphium/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + splits_path: ../data/graphium/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` # split_names: [train, val, test_seen] seed: 42 label_normalization: diff --git a/expts/hydra-configs/tasks/loss_metrics_datamodule/toymix.yaml b/expts/hydra-configs/tasks/loss_metrics_datamodule/toymix.yaml index 9ac744a52..ce9017a94 100644 --- a/expts/hydra-configs/tasks/loss_metrics_datamodule/toymix.yaml +++ b/expts/hydra-configs/tasks/loss_metrics_datamodule/toymix.yaml @@ -62,12 +62,12 @@ datamodule: qm9: df: null df_path: ${constants.data_dir}/qm9.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/qm9.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/qm9.csv.gz # or set path as the URL directly smiles_col: "smiles" label_cols: ["A", "B", "C", "mu", "alpha", "homo", "lumo", "gap", "r2", "zpve", "u0", "u298", "h298", "g298", "cv", "u0_atom", "u298_atom", "h298_atom", "g298_atom"] # sample_size: 2000 # use sample_size for test - splits_path: ${constants.data_dir}/qm9_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/qm9_random_splits.pt` + splits_path: ${constants.data_dir}/qm9_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/qm9_random_splits.pt` seed: ${constants.seed} #*seed task_level: graph label_normalization: @@ -77,24 +77,24 @@ datamodule: tox21: df: null df_path: ${constants.data_dir}/Tox21-7k-12-labels.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/Tox21-7k-12-labels.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/Tox21-7k-12-labels.csv.gz # or set path as the URL directly smiles_col: "smiles" label_cols: ["NR-AR", "NR-AR-LBD", "NR-AhR", "NR-Aromatase", "NR-ER", "NR-ER-LBD", "NR-PPAR-gamma", "SR-ARE", "SR-ATAD5", "SR-HSE", "SR-MMP", "SR-p53"] # sample_size: 2000 # use sample_size for test - splits_path: ${constants.data_dir}/Tox21_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/Tox21_random_splits.pt` + splits_path: ${constants.data_dir}/Tox21_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/Tox21_random_splits.pt` seed: ${constants.seed} task_level: graph zinc: df: null df_path: ${constants.data_dir}/ZINC12k.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/ZINC12k.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/ZINC12k.csv.gz # or set path as the URL directly smiles_col: "smiles" label_cols: ["SA", "logp", "score"] # sample_size: 2000 # use sample_size for test - splits_path: ${constants.data_dir}/ZINC12k_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/ZINC12k_random_splits.pt` + splits_path: ${constants.data_dir}/ZINC12k_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/ZINC12k_random_splits.pt` seed: ${constants.seed} task_level: graph label_normalization: diff --git a/expts/main_run_get_fingerprints.py b/expts/main_run_get_fingerprints.py index 94d7f066e..a09d7dc93 100644 --- a/expts/main_run_get_fingerprints.py +++ b/expts/main_run_get_fingerprints.py @@ -27,7 +27,7 @@ def main() -> None: LIST_CONCAT_LAST_LAYERS = [1, 0, [1, 2], [0, 1, 2]] DATA_NAME_ALL = ["molbace"] # , "mollipo", "moltox21", "molHIV"] - MODEL_PATH = "gs://graphium-public/pretrained-models" + MODEL_PATH = "https://storage.valencelabs.com/graphium/pretrained-models" MODEL_NAME = "graphium-zinc-micro-dummy-test" MODEL_FILE = f"{MODEL_PATH}/{MODEL_NAME}/model.ckpt" MODEL_CONFIG = f"{MODEL_PATH}/{MODEL_NAME}/configs.yaml" diff --git a/expts/neurips2023_configs/base_config/large.yaml b/expts/neurips2023_configs/base_config/large.yaml index 8a836f368..0c5512a12 100644 --- a/expts/neurips2023_configs/base_config/large.yaml +++ b/expts/neurips2023_configs/base_config/large.yaml @@ -68,49 +68,49 @@ datamodule: l1000_vcap: df: null df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz # or set path as the URL directly smiles_col: "SMILES" label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` epoch_sampling_fraction: 1.0 l1000_mcf7: df: null df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz # or set path as the URL directly smiles_col: "SMILES" label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` epoch_sampling_fraction: 1.0 pcba_1328: df: null df_path: graphium/data/neurips2023/large-dataset/PCBA_1328_1564k.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet # or set path as the URL directly smiles_col: "SMILES" label_cols: assayID-* # assayID-* means all columns starting with "assayID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` epoch_sampling_fraction: 1.0 pcqm4m_g25: df: null df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: graph_* # graph_* means all columns starting with "graph_" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` label_normalization: normalize_val_test: True method: "normal" @@ -119,13 +119,13 @@ datamodule: pcqm4m_n4: df: null df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: node_* # node_* means all columns starting with "node_" # sample_size: 2000 # use sample_size for test task_level: node - splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` seed: 42 label_normalization: normalize_val_test: True diff --git a/expts/neurips2023_configs/base_config/large_pcba.yaml b/expts/neurips2023_configs/base_config/large_pcba.yaml index f90675e73..f32f00391 100644 --- a/expts/neurips2023_configs/base_config/large_pcba.yaml +++ b/expts/neurips2023_configs/base_config/large_pcba.yaml @@ -67,49 +67,49 @@ datamodule: task_specific_args: # To be replaced by a new class "DatasetParams" # df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz - # # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz + # # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz # # or set path as the URL directly # smiles_col: "SMILES" # label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # # sample_size: 2000 # use sample_size for test # task_level: graph - # splits_path: graphium/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` + # splits_path: graphium/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` # epoch_sampling_fraction: 1.0 # l1000_mcf7: # df: null # df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz - # # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz + # # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz # # or set path as the URL directly # smiles_col: "SMILES" # label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # # sample_size: 2000 # use sample_size for test # task_level: graph - # splits_path: graphium/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` + # splits_path: graphium/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` # epoch_sampling_fraction: 1.0 pcba_1328: df: null df_path: graphium/data/neurips2023/large-dataset/PCBA_1328_1564k.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet # or set path as the URL directly smiles_col: "SMILES" label_cols: assayID-* # assayID-* means all columns starting with "assayID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` epoch_sampling_fraction: 1.0 # pcqm4m_g25: # df: null # df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet - # # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # # or set path as the URL directly # smiles_col: "ordered_smiles" # label_cols: graph_* # graph_* means all columns starting with "graph_" # # sample_size: 2000 # use sample_size for test # task_level: graph - # splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + # splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` # label_normalization: # normalize_val_test: True # method: "normal" @@ -118,13 +118,13 @@ datamodule: # pcqm4m_n4: #df: null #df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet - ## wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + ## wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet ## or set path as the URL directly #smiles_col: "ordered_smiles" #label_cols: node_* # node_* means all columns starting with "node_" ## sample_size: 2000 # use sample_size for test #task_level: node - #splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + #splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` #seed: 42 #label_normalization: # normalize_val_test: True diff --git a/expts/neurips2023_configs/base_config/large_pcqm_g25.yaml b/expts/neurips2023_configs/base_config/large_pcqm_g25.yaml index 1fac9176b..56f8fd9a9 100644 --- a/expts/neurips2023_configs/base_config/large_pcqm_g25.yaml +++ b/expts/neurips2023_configs/base_config/large_pcqm_g25.yaml @@ -67,49 +67,49 @@ datamodule: task_specific_args: # To be replaced by a new class "DatasetParams" # df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz - # # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz + # # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz # # or set path as the URL directly # smiles_col: "SMILES" # label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # # sample_size: 2000 # use sample_size for test # task_level: graph - # splits_path: graphium/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` + # splits_path: graphium/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` # epoch_sampling_fraction: 1.0 # l1000_mcf7: # df: null # df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz - # # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz + # # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz # # or set path as the URL directly # smiles_col: "SMILES" # label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # # sample_size: 2000 # use sample_size for test # task_level: graph - # splits_path: graphium/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` + # splits_path: graphium/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` # epoch_sampling_fraction: 1.0 # pcba_1328: # df: null # df_path: graphium/data/neurips2023/large-dataset/PCBA_1328_1564k.parquet - # # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet + # # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet # # or set path as the URL directly # smiles_col: "SMILES" # label_cols: assayID-* # assayID-* means all columns starting with "assayID-" # # sample_size: 2000 # use sample_size for test # task_level: graph - # splits_path: graphium/data/neurips2023/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` + # splits_path: graphium/data/neurips2023/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` # epoch_sampling_fraction: 1.0 pcqm4m_g25: df: null df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: graph_* # graph_* means all columns starting with "graph_" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` label_normalization: normalize_val_test: True method: "normal" @@ -118,13 +118,13 @@ datamodule: # pcqm4m_n4: # df: null # df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet - # # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # # or set path as the URL directly # smiles_col: "ordered_smiles" # label_cols: node_* # node_* means all columns starting with "node_" # # sample_size: 2000 # use sample_size for test # task_level: node - # splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + # splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` # seed: 42 # label_normalization: # normalize_val_test: True diff --git a/expts/neurips2023_configs/base_config/large_pcqm_n4.yaml b/expts/neurips2023_configs/base_config/large_pcqm_n4.yaml index f9a9e58b8..c959ee9d1 100644 --- a/expts/neurips2023_configs/base_config/large_pcqm_n4.yaml +++ b/expts/neurips2023_configs/base_config/large_pcqm_n4.yaml @@ -67,49 +67,49 @@ datamodule: task_specific_args: # To be replaced by a new class "DatasetParams" # df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_VCAP_0-2_th2.csv.gz - # # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz + # # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz # # or set path as the URL directly # smiles_col: "SMILES" # label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # # sample_size: 2000 # use sample_size for test # task_level: graph - # splits_path: graphium/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` + # splits_path: graphium/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` # epoch_sampling_fraction: 1.0 # l1000_mcf7: # df: null # df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_MCF7_0-2_th2.csv.gz - # # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz + # # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz # # or set path as the URL directly # smiles_col: "SMILES" # label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # # sample_size: 2000 # use sample_size for test # task_level: graph - # splits_path: graphium/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` + # splits_path: graphium/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` # epoch_sampling_fraction: 1.0 # pcba_1328: # df: null # df_path: graphium/data/neurips2023/large-dataset/PCBA_1328_1564k.parquet - # # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet + # # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet # # or set path as the URL directly # smiles_col: "SMILES" # label_cols: assayID-* # assayID-* means all columns starting with "assayID-" # # sample_size: 2000 # use sample_size for test # task_level: graph - # splits_path: graphium/data/neurips2023/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` + # splits_path: graphium/data/neurips2023/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` # epoch_sampling_fraction: 1.0 # pcqm4m_g25: # df: null # df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet - # # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # # or set path as the URL directly # smiles_col: "ordered_smiles" # label_cols: graph_* # graph_* means all columns starting with "graph_" # # sample_size: 2000 # use sample_size for test # task_level: graph - # splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + # splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` # label_normalization: # normalize_val_test: True # method: "normal" @@ -118,13 +118,13 @@ datamodule: pcqm4m_n4: df: null df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: node_* # node_* means all columns starting with "node_" # sample_size: 2000 # use sample_size for test task_level: node - splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` seed: 42 label_normalization: normalize_val_test: True @@ -427,3 +427,4 @@ trainer: max_epochs: ${predictor.torch_scheduler_kwargs.max_num_epochs} min_epochs: 1 check_val_every_n_epoch: 20 +h: 20 diff --git a/expts/neurips2023_configs/base_config/small.yaml b/expts/neurips2023_configs/base_config/small.yaml index fd7ce3fbe..652da4198 100644 --- a/expts/neurips2023_configs/base_config/small.yaml +++ b/expts/neurips2023_configs/base_config/small.yaml @@ -57,12 +57,12 @@ datamodule: qm9: df: null df_path: data/neurips2023/small-dataset/qm9.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/qm9.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/qm9.csv.gz # or set path as the URL directly smiles_col: "smiles" label_cols: ["A", "B", "C", "mu", "alpha", "homo", "lumo", "gap", "r2", "zpve", "u0", "u298", "h298", "g298", "cv", "u0_atom", "u298_atom", "h298_atom", "g298_atom"] # sample_size: 2000 # use sample_size for test - splits_path: data/neurips2023/small-dataset/qm9_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/qm9_random_splits.pt` + splits_path: data/neurips2023/small-dataset/qm9_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/qm9_random_splits.pt` seed: *seed task_level: graph label_normalization: @@ -72,24 +72,24 @@ datamodule: tox21: df: null df_path: data/neurips2023/small-dataset/Tox21-7k-12-labels.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/Tox21-7k-12-labels.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/Tox21-7k-12-labels.csv.gz # or set path as the URL directly smiles_col: "smiles" label_cols: ["NR-AR", "NR-AR-LBD", "NR-AhR", "NR-Aromatase", "NR-ER", "NR-ER-LBD", "NR-PPAR-gamma", "SR-ARE", "SR-ATAD5", "SR-HSE", "SR-MMP", "SR-p53"] # sample_size: 2000 # use sample_size for test - splits_path: data/neurips2023/small-dataset/Tox21_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/Tox21_random_splits.pt` + splits_path: data/neurips2023/small-dataset/Tox21_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/Tox21_random_splits.pt` seed: *seed task_level: graph zinc: df: null df_path: data/neurips2023/small-dataset/ZINC12k.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/ZINC12k.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/ZINC12k.csv.gz # or set path as the URL directly smiles_col: "smiles" label_cols: ["SA", "logp", "score"] # sample_size: 2000 # use sample_size for test - splits_path: data/neurips2023/small-dataset/ZINC12k_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/ZINC12k_random_splits.pt` + splits_path: data/neurips2023/small-dataset/ZINC12k_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/ZINC12k_random_splits.pt` seed: *seed task_level: graph label_normalization: diff --git a/expts/neurips2023_configs/baseline/config_small_gcn_baseline.yaml b/expts/neurips2023_configs/baseline/config_small_gcn_baseline.yaml index 7b2d2cbdf..2f6ca95c7 100644 --- a/expts/neurips2023_configs/baseline/config_small_gcn_baseline.yaml +++ b/expts/neurips2023_configs/baseline/config_small_gcn_baseline.yaml @@ -56,12 +56,12 @@ datamodule: qm9: df: null df_path: data/neurips2023/small-dataset/qm9.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/qm9.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/qm9.csv.gz # or set path as the URL directly smiles_col: "smiles" label_cols: ["A", "B", "C", "mu", "alpha", "homo", "lumo", "gap", "r2", "zpve", "u0", "u298", "h298", "g298", "cv", "u0_atom", "u298_atom", "h298_atom", "g298_atom"] # sample_size: 2000 # use sample_size for test - splits_path: data/neurips2023/small-dataset/qm9_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/qm9_random_splits.pt` + splits_path: data/neurips2023/small-dataset/qm9_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/qm9_random_splits.pt` seed: *seed task_level: graph label_normalization: @@ -71,24 +71,24 @@ datamodule: tox21: df: null df_path: data/neurips2023/small-dataset/Tox21-7k-12-labels.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/Tox21-7k-12-labels.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/Tox21-7k-12-labels.csv.gz # or set path as the URL directly smiles_col: "smiles" label_cols: ["NR-AR", "NR-AR-LBD", "NR-AhR", "NR-Aromatase", "NR-ER", "NR-ER-LBD", "NR-PPAR-gamma", "SR-ARE", "SR-ATAD5", "SR-HSE", "SR-MMP", "SR-p53"] # sample_size: 2000 # use sample_size for test - splits_path: data/neurips2023/small-dataset/Tox21_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/Tox21_random_splits.pt` + splits_path: data/neurips2023/small-dataset/Tox21_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/Tox21_random_splits.pt` seed: *seed task_level: graph zinc: df: null df_path: data/neurips2023/small-dataset/ZINC12k.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/ZINC12k.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/ZINC12k.csv.gz # or set path as the URL directly smiles_col: "smiles" label_cols: ["SA", "logp", "score"] # sample_size: 2000 # use sample_size for test - splits_path: data/neurips2023/small-dataset/ZINC12k_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/ZINC12k_random_splits.pt` + splits_path: data/neurips2023/small-dataset/ZINC12k_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/ZINC12k_random_splits.pt` seed: *seed task_level: graph label_normalization: diff --git a/expts/neurips2023_configs/config_classifigression_l1000.yaml b/expts/neurips2023_configs/config_classifigression_l1000.yaml index 48f06d9d1..a3241be9d 100644 --- a/expts/neurips2023_configs/config_classifigression_l1000.yaml +++ b/expts/neurips2023_configs/config_classifigression_l1000.yaml @@ -56,24 +56,24 @@ datamodule: l1000_vcap: df: null df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_VCAP_0-4.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz # or set path as the URL directly smiles_col: "SMILES" label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/small-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` + splits_path: graphium/data/neurips2023/small-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` l1000_mcf7: df: null df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_MCF7_0-4.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz # or set path as the URL directly smiles_col: "SMILES" label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/small-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` + splits_path: graphium/data/neurips2023/small-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` # Featurization prepare_dict_or_graph: pyg:graph diff --git a/expts/neurips2023_configs/config_large_gcn_gpu.yaml b/expts/neurips2023_configs/config_large_gcn_gpu.yaml index 2830530aa..ae9eb3bc0 100644 --- a/expts/neurips2023_configs/config_large_gcn_gpu.yaml +++ b/expts/neurips2023_configs/config_large_gcn_gpu.yaml @@ -30,24 +30,24 @@ datamodule: args: task_specific_args: l1000_vcap: - df_path: expts/data/neurips2023/large-dataset/LINCS_L1000_VCAP_0-4.csv.gz # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz - splits_path: expts/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` + df_path: expts/data/neurips2023/large-dataset/LINCS_L1000_VCAP_0-4.csv.gz # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz + splits_path: expts/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` l1000_mcf7: - df_path: expts/data/neurips2023/large-dataset/LINCS_L1000_MCF7_0-4.csv.gz # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz - splits_path: expts/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` + df_path: expts/data/neurips2023/large-dataset/LINCS_L1000_MCF7_0-4.csv.gz # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz + splits_path: expts/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` pcba_1328: - df_path: expts/data/neurips2023/large-dataset/PCBA_1328_1564k.parquet # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet - splits_path: expts/data/neurips2023/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` + df_path: expts/data/neurips2023/large-dataset/PCBA_1328_1564k.parquet # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet + splits_path: expts/data/neurips2023/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` pcqm4m_g25: - df_path: expts/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet - splits_path: expts/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + df_path: expts/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + splits_path: expts/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` pcqm4m_n4: - df_path: expts/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet - splits_path: expts/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + df_path: expts/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + splits_path: expts/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` featurization_n_jobs: 4 # 30 processed_graph_data_path: "../datacache/neurips2023-small/" diff --git a/expts/neurips2023_configs/config_luis_jama.yaml b/expts/neurips2023_configs/config_luis_jama.yaml index 5135c5cae..22a6f9245 100644 --- a/expts/neurips2023_configs/config_luis_jama.yaml +++ b/expts/neurips2023_configs/config_luis_jama.yaml @@ -57,7 +57,7 @@ datamodule: pcqm20k_g13: df: null df_path: graphium/data/neurips2023/dummy-dataset/PCQM20k_G13_N4.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Dummy-dataset/PCQM20k_G13_N4.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Dummy-dataset/PCQM20k_G13_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: graph_* # graph_* means all columns starting with "graph_" @@ -71,7 +71,7 @@ datamodule: pcqm20k_n4: df: null df_path: graphium/data/neurips2023/dummy-dataset/PCQM20k_G13_N4.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Dummy-dataset/PCQM20k_G13_N4.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Dummy-dataset/PCQM20k_G13_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: node_* # node_* means all columns starting with "node_" diff --git a/expts/neurips2023_configs/config_small_gcn_gpu.yaml b/expts/neurips2023_configs/config_small_gcn_gpu.yaml index 8b5a46e26..57f859921 100644 --- a/expts/neurips2023_configs/config_small_gcn_gpu.yaml +++ b/expts/neurips2023_configs/config_small_gcn_gpu.yaml @@ -32,15 +32,15 @@ datamodule: task_specific_args: # To be replaced by a new class "DatasetParams" qm9: df_path: expts/data/neurips2023/small-dataset/qm9.csv.gz - splits_path: expts/data/neurips2023/small-dataset/qm9_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/qm9_random_splits.pt` + splits_path: expts/data/neurips2023/small-dataset/qm9_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/qm9_random_splits.pt` tox21: df_path: expts/data/neurips2023/small-dataset/Tox21-7k-12-labels.csv.gz - splits_path: expts/data/neurips2023/small-dataset/Tox21_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/Tox21_random_splits.pt` + splits_path: expts/data/neurips2023/small-dataset/Tox21_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/Tox21_random_splits.pt` zinc: df_path: expts/data/neurips2023/small-dataset/ZINC12k.csv.gz - splits_path: expts/data/neurips2023/small-dataset/ZINC12k_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/ZINC12k_random_splits.pt` + splits_path: expts/data/neurips2023/small-dataset/ZINC12k_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/ZINC12k_random_splits.pt` featurization_n_jobs: 4 # 30 processed_graph_data_path: "../datacache/neurips2023-small/" num_workers: 4 # 30 diff --git a/expts/neurips2023_configs/single_task_gcn/config_large_gcn_mcf7.yaml b/expts/neurips2023_configs/single_task_gcn/config_large_gcn_mcf7.yaml index e05d1be8d..cf6b1cf85 100644 --- a/expts/neurips2023_configs/single_task_gcn/config_large_gcn_mcf7.yaml +++ b/expts/neurips2023_configs/single_task_gcn/config_large_gcn_mcf7.yaml @@ -56,13 +56,13 @@ datamodule: l1000_mcf7: df: null df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_MCF7_0-4.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz # or set path as the URL directly smiles_col: "SMILES" label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` # Featurization prepare_dict_or_graph: pyg:graph diff --git a/expts/neurips2023_configs/single_task_gcn/config_large_gcn_pcba.yaml b/expts/neurips2023_configs/single_task_gcn/config_large_gcn_pcba.yaml index cf924850e..ece6fa4b4 100644 --- a/expts/neurips2023_configs/single_task_gcn/config_large_gcn_pcba.yaml +++ b/expts/neurips2023_configs/single_task_gcn/config_large_gcn_pcba.yaml @@ -56,13 +56,13 @@ datamodule: pcba_1328: df: null df_path: graphium/data/neurips2023/large-dataset/PCBA_1328_1564k.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet # or set path as the URL directly smiles_col: "SMILES" label_cols: assayID-* # assayID-* means all columns starting with "assayID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` # Featurization prepare_dict_or_graph: pyg:graph diff --git a/expts/neurips2023_configs/single_task_gcn/config_large_gcn_vcap.yaml b/expts/neurips2023_configs/single_task_gcn/config_large_gcn_vcap.yaml index f1c9bcfd4..c04ee680c 100644 --- a/expts/neurips2023_configs/single_task_gcn/config_large_gcn_vcap.yaml +++ b/expts/neurips2023_configs/single_task_gcn/config_large_gcn_vcap.yaml @@ -56,13 +56,13 @@ datamodule: l1000_vcap: df: null df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_VCAP_0-4.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz # or set path as the URL directly smiles_col: "SMILES" label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` # Featurization prepare_dict_or_graph: pyg:graph diff --git a/expts/neurips2023_configs/single_task_gin/config_large_gin_g25.yaml b/expts/neurips2023_configs/single_task_gin/config_large_gin_g25.yaml index 01988e527..5679249d4 100644 --- a/expts/neurips2023_configs/single_task_gin/config_large_gin_g25.yaml +++ b/expts/neurips2023_configs/single_task_gin/config_large_gin_g25.yaml @@ -56,13 +56,13 @@ datamodule: pcqm4m_g25: df: null df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: graph_* # graph_* means all columns starting with "graph_" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` label_normalization: normalize_val_test: True method: "normal" diff --git a/expts/neurips2023_configs/single_task_gin/config_large_gin_mcf7.yaml b/expts/neurips2023_configs/single_task_gin/config_large_gin_mcf7.yaml index fdeb4b399..08ed6d515 100644 --- a/expts/neurips2023_configs/single_task_gin/config_large_gin_mcf7.yaml +++ b/expts/neurips2023_configs/single_task_gin/config_large_gin_mcf7.yaml @@ -56,13 +56,13 @@ datamodule: l1000_mcf7: df: null df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_MCF7_0-4.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz # or set path as the URL directly smiles_col: "SMILES" label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` # Featurization prepare_dict_or_graph: pyg:graph diff --git a/expts/neurips2023_configs/single_task_gin/config_large_gin_n4.yaml b/expts/neurips2023_configs/single_task_gin/config_large_gin_n4.yaml index 5920a80f6..971357f7d 100644 --- a/expts/neurips2023_configs/single_task_gin/config_large_gin_n4.yaml +++ b/expts/neurips2023_configs/single_task_gin/config_large_gin_n4.yaml @@ -56,13 +56,13 @@ datamodule: pcqm4m_n4: df: null df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: node_* # node_* means all columns starting with "node_" # sample_size: 2000 # use sample_size for test task_level: node - splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` seed: *seed label_normalization: normalize_val_test: True diff --git a/expts/neurips2023_configs/single_task_gin/config_large_gin_pcba.yaml b/expts/neurips2023_configs/single_task_gin/config_large_gin_pcba.yaml index de2f7fbc4..d3e711166 100644 --- a/expts/neurips2023_configs/single_task_gin/config_large_gin_pcba.yaml +++ b/expts/neurips2023_configs/single_task_gin/config_large_gin_pcba.yaml @@ -56,13 +56,13 @@ datamodule: pcba_1328: df: null df_path: graphium/data/neurips2023/large-dataset/PCBA_1328_1564k.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet # or set path as the URL directly smiles_col: "SMILES" label_cols: assayID-* # assayID-* means all columns starting with "assayID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` # Featurization prepare_dict_or_graph: pyg:graph diff --git a/expts/neurips2023_configs/single_task_gin/config_large_gin_pcq.yaml b/expts/neurips2023_configs/single_task_gin/config_large_gin_pcq.yaml index ca820e86b..fbe7533fd 100644 --- a/expts/neurips2023_configs/single_task_gin/config_large_gin_pcq.yaml +++ b/expts/neurips2023_configs/single_task_gin/config_large_gin_pcq.yaml @@ -56,13 +56,13 @@ datamodule: pcqm4m_g25: df: null df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: graph_* # graph_* means all columns starting with "graph_" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` label_normalization: normalize_val_test: True method: "normal" @@ -70,13 +70,13 @@ datamodule: pcqm4m_n4: df: null df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: node_* # node_* means all columns starting with "node_" # sample_size: 2000 # use sample_size for test task_level: node - splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` seed: *seed label_normalization: normalize_val_test: True diff --git a/expts/neurips2023_configs/single_task_gin/config_large_gin_vcap.yaml b/expts/neurips2023_configs/single_task_gin/config_large_gin_vcap.yaml index c21b765b3..0647adcef 100644 --- a/expts/neurips2023_configs/single_task_gin/config_large_gin_vcap.yaml +++ b/expts/neurips2023_configs/single_task_gin/config_large_gin_vcap.yaml @@ -56,13 +56,13 @@ datamodule: l1000_vcap: df: null df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_VCAP_0-4.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz # or set path as the URL directly smiles_col: "SMILES" label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` # Featurization prepare_dict_or_graph: pyg:graph diff --git a/expts/neurips2023_configs/single_task_gine/config_large_gine_g25.yaml b/expts/neurips2023_configs/single_task_gine/config_large_gine_g25.yaml index b88314797..6edf29378 100644 --- a/expts/neurips2023_configs/single_task_gine/config_large_gine_g25.yaml +++ b/expts/neurips2023_configs/single_task_gine/config_large_gine_g25.yaml @@ -56,13 +56,13 @@ datamodule: pcqm4m_g25: df: null df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: graph_* # graph_* means all columns starting with "graph_" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` label_normalization: normalize_val_test: True method: "normal" diff --git a/expts/neurips2023_configs/single_task_gine/config_large_gine_mcf7.yaml b/expts/neurips2023_configs/single_task_gine/config_large_gine_mcf7.yaml index b96fc8daf..1f3cafde5 100644 --- a/expts/neurips2023_configs/single_task_gine/config_large_gine_mcf7.yaml +++ b/expts/neurips2023_configs/single_task_gine/config_large_gine_mcf7.yaml @@ -56,13 +56,13 @@ datamodule: l1000_mcf7: df: null df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_MCF7_0-4.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_MCF7_0-4.csv.gz # or set path as the URL directly smiles_col: "SMILES" label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/l1000_mcf7_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_mcf7_random_splits.pt` # Featurization prepare_dict_or_graph: pyg:graph diff --git a/expts/neurips2023_configs/single_task_gine/config_large_gine_n4.yaml b/expts/neurips2023_configs/single_task_gine/config_large_gine_n4.yaml index e98ae03da..9a08a504b 100644 --- a/expts/neurips2023_configs/single_task_gine/config_large_gine_n4.yaml +++ b/expts/neurips2023_configs/single_task_gine/config_large_gine_n4.yaml @@ -56,13 +56,13 @@ datamodule: pcqm4m_n4: df: null df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: node_* # node_* means all columns starting with "node_" # sample_size: 2000 # use sample_size for test task_level: node - splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` seed: *seed label_normalization: normalize_val_test: True diff --git a/expts/neurips2023_configs/single_task_gine/config_large_gine_pcba.yaml b/expts/neurips2023_configs/single_task_gine/config_large_gine_pcba.yaml index 427f7ca0f..4b05271ec 100644 --- a/expts/neurips2023_configs/single_task_gine/config_large_gine_pcba.yaml +++ b/expts/neurips2023_configs/single_task_gine/config_large_gine_pcba.yaml @@ -56,13 +56,13 @@ datamodule: pcba_1328: df: null df_path: graphium/data/neurips2023/large-dataset/PCBA_1328_1564k.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCBA_1328_1564k.parquet # or set path as the URL directly smiles_col: "SMILES" label_cols: assayID-* # assayID-* means all columns starting with "assayID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/pcba_1328_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcba_1328_random_splits.pt` # Featurization prepare_dict_or_graph: pyg:graph diff --git a/expts/neurips2023_configs/single_task_gine/config_large_gine_pcq.yaml b/expts/neurips2023_configs/single_task_gine/config_large_gine_pcq.yaml index 07fc6d009..b359cc2f0 100644 --- a/expts/neurips2023_configs/single_task_gine/config_large_gine_pcq.yaml +++ b/expts/neurips2023_configs/single_task_gine/config_large_gine_pcq.yaml @@ -56,13 +56,13 @@ datamodule: pcqm4m_g25: df: null df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: graph_* # graph_* means all columns starting with "graph_" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` label_normalization: normalize_val_test: True method: "normal" @@ -70,13 +70,13 @@ datamodule: pcqm4m_n4: df: null df_path: graphium/data/neurips2023/large-dataset/PCQM4M_G25_N4.parquet - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/PCQM4M_G25_N4.parquet # or set path as the URL directly smiles_col: "ordered_smiles" label_cols: node_* # node_* means all columns starting with "node_" # sample_size: 2000 # use sample_size for test task_level: node - splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/pcqm4m_g25_n4_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/pcqm4m_g25_n4_random_splits.pt` seed: *seed label_normalization: normalize_val_test: True diff --git a/expts/neurips2023_configs/single_task_gine/config_large_gine_vcap.yaml b/expts/neurips2023_configs/single_task_gine/config_large_gine_vcap.yaml index b63263b3d..4bb034252 100644 --- a/expts/neurips2023_configs/single_task_gine/config_large_gine_vcap.yaml +++ b/expts/neurips2023_configs/single_task_gine/config_large_gine_vcap.yaml @@ -56,13 +56,13 @@ datamodule: l1000_vcap: df: null df_path: graphium/data/neurips2023/large-dataset/LINCS_L1000_VCAP_0-4.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/LINCS_L1000_VCAP_0-4.csv.gz # or set path as the URL directly smiles_col: "SMILES" label_cols: geneID-* # geneID-* means all columns starting with "geneID-" # sample_size: 2000 # use sample_size for test task_level: graph - splits_path: graphium/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` + splits_path: graphium/data/neurips2023/large-dataset/l1000_vcap_random_splits.pt # Download with `wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Large-dataset/l1000_vcap_random_splits.pt` # Featurization prepare_dict_or_graph: pyg:graph diff --git a/graphium/data/utils.py b/graphium/data/utils.py index aa5151a90..9764d29d6 100644 --- a/graphium/data/utils.py +++ b/graphium/data/utils.py @@ -27,7 +27,7 @@ from torch_geometric.data import Data from graphium.features.featurizer import GraphDict -GRAPHIUM_DATASETS_BASE_URL = "gs://graphium-public/datasets" +GRAPHIUM_DATASETS_BASE_URL = "https://storage.valencelabs.com/graphium/datasets" GRAPHIUM_DATASETS = { "graphium-zinc-micro": "zinc-micro.zip", "graphium-zinc-bench-gnn": "zinc-bench-gnn.zip", diff --git a/install_ipu.sh b/install_ipu.sh index a21022bdb..4f6d3ecc8 100755 --- a/install_ipu.sh +++ b/install_ipu.sh @@ -97,7 +97,7 @@ echo "Downloading the sub-datasets consisting on the ToyMix dataset" toymix_dir=expts/data/neurips2023/small-dataset/ mkdir -p $toymix_dir -base_url="https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/" +base_url="https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/" files=("ZINC12k.csv.gz" "Tox21-7k-12-labels.csv.gz" "qm9.csv.gz" "qm9_random_splits.pt" "Tox21_random_splits.pt" "ZINC12k_random_splits.pt") for file in "${files[@]}"; do diff --git a/notebooks/dev-pretrained.ipynb b/notebooks/dev-pretrained.ipynb index bdaaba6fa..0c41d03e0 100644 --- a/notebooks/dev-pretrained.ipynb +++ b/notebooks/dev-pretrained.ipynb @@ -312,7 +312,7 @@ ], "source": [ "# Load a pretrained model\n", - "model_path = \"s3://graphium-public/pretrained-models/ZINC-micro-dummy-test.ckpt\"\n", + "model_path = \"https://storage.valencelabs.com/graphium/pretrained-models/ZINC-micro-dummy-test.ckpt\"\n", "# model_path = \"/home/hadim/Drive/Data/graphium/test-training/default/version_0/checkpoints/model.ckpt\"\n", "predictor = graphium.trainer.predictor.PredictorModule.load_from_checkpoint(model_path)\n", "\n", diff --git a/notebooks/running-fingerprints-from-pretrained-model.ipynb b/notebooks/running-fingerprints-from-pretrained-model.ipynb index ffa9b6f12..1f3c3cbb2 100644 --- a/notebooks/running-fingerprints-from-pretrained-model.ipynb +++ b/notebooks/running-fingerprints-from-pretrained-model.ipynb @@ -55,12 +55,12 @@ "execution_count": 2, "source": [ "# Path containing the model and its configurations\r\n", - "MODEL_PATH = \"gs://graphium-public/pretrained-models/graphium-zinc-micro-dummy-test\"\r\n", + "MODEL_PATH = \"https://storage.valencelabs.com/graphium/pretrained-models/graphium-zinc-micro-dummy-test\"\r\n", "MODEL_FILE = f\"{MODEL_PATH}/model.ckpt\"\r\n", "CONFIG_FILE = f\"{MODEL_PATH}/configs.yaml\"\r\n", "\r\n", "# Path containing the SMILES data to infer\r\n", - "SMILES_DF_PATH = f\"gs://graphium-public/datasets/graphium-zinc-bench-gnn/smiles_score.csv.gz\"\r\n", + "SMILES_DF_PATH = f\"https://storage.valencelabs.com/graphium/datasets/graphium-zinc-bench-gnn/smiles_score.csv.gz\"\r\n", "SMILES_COL = \"SMILES\"\r\n", "\r\n", "# Number of layers to drop when inferring the fingerprints\r\n", diff --git a/profiling/configs_profiling.yaml b/profiling/configs_profiling.yaml index 0ff4f6c94..04797edf8 100644 --- a/profiling/configs_profiling.yaml +++ b/profiling/configs_profiling.yaml @@ -5,7 +5,7 @@ constants: datamodule: module_type: "DGLFromSmilesDataModule" args: - df_path: https://storage.googleapis.com/graphium-public/datasets/graphium-zinc-bench-gnn/smiles_score.csv.gz + df_path: https://storage.valencelabs.com/graphium/datasets/graphium-zinc-bench-gnn/smiles_score.csv.gz processed_graph_data_path: null label_cols: ['score'] smiles_col: SMILES @@ -32,7 +32,7 @@ datamodule: split_val: null split_test: null split_seed: *seed - splits_path: https://storage.googleapis.com/graphium-public/datasets/graphium-zinc-bench-gnn/indexes_train_val_test.csv.gz + splits_path: https://storage.valencelabs.com/graphium/datasets/graphium-zinc-bench-gnn/indexes_train_val_test.csv.gz batch_size_training: 128 batch_size_inference: 256 diff --git a/profiling/profile_predictor.py b/profiling/profile_predictor.py index 54ee28d30..44cf83a08 100644 --- a/profiling/profile_predictor.py +++ b/profiling/profile_predictor.py @@ -29,7 +29,7 @@ def main(): CONFIG_PATH = "expts/config_micro-PCBA.yaml" - # DATA_PATH = "https://storage.googleapis.com/graphium-public/datasets/graphium-zinc-bench-gnn/smiles_score.csv.gz" + # DATA_PATH = "https://storage.valencelabs.com/graphium/datasets/graphium-zinc-bench-gnn/smiles_score.csv.gz" with fsspec.open(CONFIG_PATH, "r") as f: cfg = yaml.safe_load(f) diff --git a/tests/config_test_ipu_dataloader.yaml b/tests/config_test_ipu_dataloader.yaml index f0f55d197..a325ae974 100644 --- a/tests/config_test_ipu_dataloader.yaml +++ b/tests/config_test_ipu_dataloader.yaml @@ -34,7 +34,7 @@ datamodule: task_specific_args: # To be replaced by a new class "DatasetParams" homo: df: null - df_path: &df_path https://storage.googleapis.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2-2k-lumo-alpha.csv + df_path: &df_path https://storage.valencelabs.com/datasets-public-research/PCQM4M/cxsmiles/pcqm4mv2-2k-lumo-alpha.csv smiles_col: "cxsmiles" label_cols: ["homo_lumo_gap", "lumo"] split_val: 0.2 diff --git a/tests/config_test_ipu_dataloader_multitask.yaml b/tests/config_test_ipu_dataloader_multitask.yaml index 8b8fbf417..1687379c0 100644 --- a/tests/config_test_ipu_dataloader_multitask.yaml +++ b/tests/config_test_ipu_dataloader_multitask.yaml @@ -57,7 +57,7 @@ datamodule: qm9: df: null df_path: qm9.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/qm9.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/qm9.csv.gz # or set path as the URL directly smiles_col: "smiles" label_cols: ["A", "B", "C", "mu", "alpha", "homo", "lumo", "gap", "r2", "zpve", "u0", "u298", "h298", "g298", "cv", "u0_atom", "u298_atom", "h298_atom", "g298_atom"] @@ -71,7 +71,7 @@ datamodule: tox21: df: null df_path: Tox21-7k-12-labels.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/Tox21-7k-12-labels.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/Tox21-7k-12-labels.csv.gz # or set path as the URL directly smiles_col: "smiles" label_cols: ["NR-AR", "NR-AR-LBD", "NR-AhR", "NR-Aromatase", "NR-ER", "NR-ER-LBD", "NR-PPAR-gamma", "SR-ARE", "SR-ATAD5", "SR-HSE", "SR-MMP", "SR-p53"] @@ -83,7 +83,7 @@ datamodule: df: null df_path: ZINC12k.csv.gz # df_path: data/neurips2023/small-dataset/ZINC12k.csv.gz - # wget https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/ZINC12k.csv.gz + # wget https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/ZINC12k.csv.gz # or set path as the URL directly smiles_col: "smiles" label_cols: ["SA", "logp", "score"] diff --git a/tests/test_training.py b/tests/test_training.py index 3ac31fc35..2d98c7cd5 100644 --- a/tests/test_training.py +++ b/tests/test_training.py @@ -29,7 +29,7 @@ def setup_class(cls): toymix_dir = "expts/data/neurips2023/small-dataset/" subprocess.run(["mkdir", "-p", toymix_dir]) - base_url = "https://storage.googleapis.com/graphium-public/datasets/neurips_2023/Small-dataset/" + base_url = "https://storage.valencelabs.com/graphium/datasets/neurips_2023/Small-dataset/" files = [ "ZINC12k.csv.gz", "Tox21-7k-12-labels.csv.gz", From 4621a5db10b91310f54770af0493037319fd6216 Mon Sep 17 00:00:00 2001 From: Julien St-Laurent Date: Wed, 30 Apr 2025 13:37:46 -0400 Subject: [PATCH 2/2] chore: Formatting --- graphium/cli/fingerprints.py | 3 +-- graphium/config/_load.py | 1 - graphium/config/_loader.py | 1 - graphium/config/config_convert.py | 1 - graphium/data/collate.py | 1 - graphium/data/datamodule.py | 1 - graphium/data/dataset.py | 1 - graphium/data/multilevel_utils.py | 1 - graphium/data/normalization.py | 1 - graphium/data/sampler.py | 1 - graphium/data/smiles_transform.py | 1 - graphium/data/utils.py | 1 - graphium/features/commute.py | 1 - graphium/features/electrostatic.py | 1 - graphium/features/featurizer.py | 1 - graphium/features/graphormer.py | 1 - graphium/features/nmp.py | 1 - graphium/features/positional_encoding.py | 1 - graphium/features/properties.py | 1 - graphium/features/rw.py | 1 - graphium/features/spectral.py | 1 - graphium/features/transfer_pos_level.py | 1 - graphium/finetuning/finetuning.py | 1 - graphium/finetuning/finetuning_architecture.py | 1 - graphium/finetuning/fingerprinting.py | 1 - graphium/finetuning/utils.py | 1 - graphium/hyper_param_search/results.py | 1 - graphium/ipu/ipu_dataloader.py | 1 - graphium/ipu/ipu_losses.py | 1 - graphium/ipu/ipu_metrics.py | 1 - graphium/ipu/ipu_simple_lightning.py | 1 - graphium/ipu/ipu_utils.py | 1 - graphium/ipu/ipu_wrapper.py | 1 - graphium/ipu/to_dense_batch.py | 1 - graphium/nn/architectures/encoder_manager.py | 1 - graphium/nn/architectures/global_architectures.py | 5 +---- graphium/nn/architectures/pyg_architectures.py | 1 - graphium/nn/base_graph_layer.py | 1 - graphium/nn/base_layers.py | 1 - graphium/nn/encoders/signnet_pos_encoder.py | 1 + graphium/nn/ensemble_layers.py | 1 - graphium/nn/pyg_layers/dimenet_pyg.py | 1 - graphium/nn/pyg_layers/gated_gcn_pyg.py | 1 - graphium/nn/pyg_layers/gcn_pyg.py | 1 - graphium/nn/pyg_layers/gin_pyg.py | 1 - graphium/nn/pyg_layers/gps_pyg.py | 1 - graphium/nn/pyg_layers/mpnn_pyg.py | 1 - graphium/nn/pyg_layers/pna_pyg.py | 1 - graphium/nn/pyg_layers/pooling_pyg.py | 1 - graphium/nn/pyg_layers/utils.py | 1 - graphium/nn/residual_connections.py | 1 - graphium/nn/utils.py | 1 - graphium/trainer/losses.py | 1 - graphium/trainer/metrics.py | 1 - graphium/trainer/predictor.py | 7 +++---- graphium/trainer/predictor_options.py | 5 ++++- graphium/trainer/predictor_summaries.py | 7 +++---- graphium/utils/arg_checker.py | 1 - graphium/utils/command_line_utils.py | 1 - graphium/utils/custom_lr.py | 1 - graphium/utils/fs.py | 1 - graphium/utils/hashing.py | 1 - graphium/utils/moving_average_tracker.py | 1 - graphium/utils/mup.py | 1 - graphium/utils/packing.py | 1 - graphium/utils/safe_run.py | 1 - graphium/utils/spaces.py | 1 - graphium/utils/tensor.py | 3 +-- profiling/profile_mol_to_graph.py | 1 - profiling/profile_one_of_k_encoding.py | 1 - profiling/profile_predictor.py | 1 - scripts/convert_yml.py | 1 - tests/conftest.py | 1 - tests/test_architectures.py | 9 +++++---- tests/test_attention.py | 1 - tests/test_base_layers.py | 1 - tests/test_collate.py | 1 - tests/test_data_utils.py | 1 - tests/test_datamodule.py | 1 - tests/test_dataset.py | 1 - tests/test_ensemble_layers.py | 1 - tests/test_featurizer.py | 1 - tests/test_finetuning.py | 1 - tests/test_ipu_dataloader.py | 1 - tests/test_ipu_losses.py | 1 - tests/test_ipu_metrics.py | 1 - tests/test_ipu_options.py | 1 - tests/test_ipu_poptorch.py | 1 - tests/test_ipu_to_dense_batch.py | 1 - tests/test_loaders.py | 1 - tests/test_losses.py | 1 - tests/test_metrics.py | 1 - tests/test_mtl_architecture.py | 1 - tests/test_multitask_datamodule.py | 1 - tests/test_mup.py | 1 - tests/test_packing.py | 1 - tests/test_pe_nodepair.py | 1 - tests/test_pe_rw.py | 1 - tests/test_pe_spectral.py | 1 - tests/test_pos_transfer_funcs.py | 1 - tests/test_positional_encoders.py | 1 - tests/test_positional_encodings.py | 1 - tests/test_predictor.py | 1 - tests/test_pyg_layers.py | 1 - tests/test_residual_connections.py | 1 - tests/test_training.py | 1 - tests/test_utils.py | 1 - 107 files changed, 19 insertions(+), 120 deletions(-) diff --git a/graphium/cli/fingerprints.py b/graphium/cli/fingerprints.py index 62b078eb9..cbaeb2fd4 100644 --- a/graphium/cli/fingerprints.py +++ b/graphium/cli/fingerprints.py @@ -2,5 +2,4 @@ @app.command(name="fp") -def get_fingerprints_from_model(): - ... +def get_fingerprints_from_model(): ... diff --git a/graphium/config/_load.py b/graphium/config/_load.py index dfbf1ca2d..2092da0b4 100644 --- a/graphium/config/_load.py +++ b/graphium/config/_load.py @@ -10,7 +10,6 @@ -------------------------------------------------------------------------------- """ - import importlib.resources import omegaconf diff --git a/graphium/config/_loader.py b/graphium/config/_loader.py index 1e542592d..cceb3436b 100644 --- a/graphium/config/_loader.py +++ b/graphium/config/_loader.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - # Misc import os from copy import deepcopy diff --git a/graphium/config/config_convert.py b/graphium/config/config_convert.py index 135e63dec..1d032a1bb 100644 --- a/graphium/config/config_convert.py +++ b/graphium/config/config_convert.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import omegaconf diff --git a/graphium/data/collate.py b/graphium/data/collate.py index 22486b034..aaa34a5db 100644 --- a/graphium/data/collate.py +++ b/graphium/data/collate.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from collections.abc import Mapping, Sequence # from pprint import pprint diff --git a/graphium/data/datamodule.py b/graphium/data/datamodule.py index 4e89f6728..dd6353a76 100644 --- a/graphium/data/datamodule.py +++ b/graphium/data/datamodule.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import tempfile from contextlib import redirect_stderr, redirect_stdout from typing import Type, List, Dict, Union, Any, Callable, Optional, Tuple, Iterable, Literal diff --git a/graphium/data/dataset.py b/graphium/data/dataset.py index 34c1b30aa..3830edf69 100644 --- a/graphium/data/dataset.py +++ b/graphium/data/dataset.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import os from copy import deepcopy from functools import lru_cache diff --git a/graphium/data/multilevel_utils.py b/graphium/data/multilevel_utils.py index 7f9ed5813..dd422c6fb 100644 --- a/graphium/data/multilevel_utils.py +++ b/graphium/data/multilevel_utils.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import pandas as pd import ast import numpy as np diff --git a/graphium/data/normalization.py b/graphium/data/normalization.py index 994e8939b..450462c15 100644 --- a/graphium/data/normalization.py +++ b/graphium/data/normalization.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Optional from loguru import logger import numpy as np diff --git a/graphium/data/sampler.py b/graphium/data/sampler.py index cd325cd9c..abb08f3d9 100644 --- a/graphium/data/sampler.py +++ b/graphium/data/sampler.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Dict, Optional from torch.utils.data.dataloader import Dataset diff --git a/graphium/data/smiles_transform.py b/graphium/data/smiles_transform.py index d6f22fbdc..1cbbb9956 100644 --- a/graphium/data/smiles_transform.py +++ b/graphium/data/smiles_transform.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Type, List, Dict, Union, Any, Callable, Optional, Tuple, Iterable import os diff --git a/graphium/data/utils.py b/graphium/data/utils.py index 9764d29d6..42b43c11b 100644 --- a/graphium/data/utils.py +++ b/graphium/data/utils.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Union, List, Callable, Dict, Tuple, Any, Optional import importlib.resources diff --git a/graphium/features/commute.py b/graphium/features/commute.py index a7cea768c..7b21b69ae 100644 --- a/graphium/features/commute.py +++ b/graphium/features/commute.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Tuple, Union, Dict, Any import numpy as np diff --git a/graphium/features/electrostatic.py b/graphium/features/electrostatic.py index 58dc115f7..fe94fcc10 100644 --- a/graphium/features/electrostatic.py +++ b/graphium/features/electrostatic.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Tuple, Union, Dict, Any import numpy as np diff --git a/graphium/features/featurizer.py b/graphium/features/featurizer.py index 8d8e18159..9218abae5 100644 --- a/graphium/features/featurizer.py +++ b/graphium/features/featurizer.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Union, List, Callable, Dict, Tuple, Any, Optional import inspect diff --git a/graphium/features/graphormer.py b/graphium/features/graphormer.py index d62010801..6431a60e9 100644 --- a/graphium/features/graphormer.py +++ b/graphium/features/graphormer.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Tuple, Union, Dict, Any import numpy as np diff --git a/graphium/features/nmp.py b/graphium/features/nmp.py index 97cee7019..4c65f6cb7 100644 --- a/graphium/features/nmp.py +++ b/graphium/features/nmp.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Tuple, Optional, Dict, Union import importlib.resources from copy import deepcopy diff --git a/graphium/features/positional_encoding.py b/graphium/features/positional_encoding.py index 8acc231d8..55c7c38b9 100644 --- a/graphium/features/positional_encoding.py +++ b/graphium/features/positional_encoding.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Tuple, Union, Optional, Dict, Any, OrderedDict from copy import deepcopy import numpy as np diff --git a/graphium/features/properties.py b/graphium/features/properties.py index 89a90ffee..1627368c5 100644 --- a/graphium/features/properties.py +++ b/graphium/features/properties.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Union, List, Callable import numpy as np diff --git a/graphium/features/rw.py b/graphium/features/rw.py index c7eada2ba..4a798915e 100644 --- a/graphium/features/rw.py +++ b/graphium/features/rw.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Tuple, Union, Optional, List, Dict, Any, Iterable from scipy.sparse import issparse, spmatrix, coo_matrix diff --git a/graphium/features/spectral.py b/graphium/features/spectral.py index 55d8527a4..c4a448b8f 100644 --- a/graphium/features/spectral.py +++ b/graphium/features/spectral.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Tuple, Union, Dict, Any from scipy.linalg import eig from scipy.sparse import csr_matrix, diags, issparse, spmatrix diff --git a/graphium/features/transfer_pos_level.py b/graphium/features/transfer_pos_level.py index 4bb70e160..bc973d937 100644 --- a/graphium/features/transfer_pos_level.py +++ b/graphium/features/transfer_pos_level.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Tuple, Union, List, Dict, Any, Optional import numpy as np diff --git a/graphium/finetuning/finetuning.py b/graphium/finetuning/finetuning.py index 97d6d7fc7..c4d2d8b35 100644 --- a/graphium/finetuning/finetuning.py +++ b/graphium/finetuning/finetuning.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Iterable, List, Dict, Tuple, Union, Callable, Any, Optional, Type from collections import OrderedDict diff --git a/graphium/finetuning/finetuning_architecture.py b/graphium/finetuning/finetuning_architecture.py index 864016141..4a4c00121 100644 --- a/graphium/finetuning/finetuning_architecture.py +++ b/graphium/finetuning/finetuning_architecture.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Any, Dict, Optional, Union import torch diff --git a/graphium/finetuning/fingerprinting.py b/graphium/finetuning/fingerprinting.py index 8bfdb5d94..dd65413b8 100644 --- a/graphium/finetuning/fingerprinting.py +++ b/graphium/finetuning/fingerprinting.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import torch from collections import defaultdict diff --git a/graphium/finetuning/utils.py b/graphium/finetuning/utils.py index 7b9f7df74..a4019f98f 100644 --- a/graphium/finetuning/utils.py +++ b/graphium/finetuning/utils.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from copy import deepcopy from typing import Any, Dict, List, Union diff --git a/graphium/hyper_param_search/results.py b/graphium/hyper_param_search/results.py index 30ab04cdc..14cceaa16 100644 --- a/graphium/hyper_param_search/results.py +++ b/graphium/hyper_param_search/results.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - _OBJECTIVE_KEY = "objective" diff --git a/graphium/ipu/ipu_dataloader.py b/graphium/ipu/ipu_dataloader.py index 5aa7828f4..593f0f981 100644 --- a/graphium/ipu/ipu_dataloader.py +++ b/graphium/ipu/ipu_dataloader.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Callable, Iterable, Optional, List, Tuple, Dict, Any, Union from copy import deepcopy from dataclasses import dataclass diff --git a/graphium/ipu/ipu_losses.py b/graphium/ipu/ipu_losses.py index 6bc434ae4..6bc6ec51a 100644 --- a/graphium/ipu/ipu_losses.py +++ b/graphium/ipu/ipu_losses.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import torch from torch import Tensor from torch.nn import BCELoss, BCEWithLogitsLoss, MSELoss, L1Loss diff --git a/graphium/ipu/ipu_metrics.py b/graphium/ipu/ipu_metrics.py index 9029d3e00..389e20e80 100644 --- a/graphium/ipu/ipu_metrics.py +++ b/graphium/ipu/ipu_metrics.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Optional, Tuple, Sequence, Literal import torch diff --git a/graphium/ipu/ipu_simple_lightning.py b/graphium/ipu/ipu_simple_lightning.py index b2fca086e..d2051e26d 100644 --- a/graphium/ipu/ipu_simple_lightning.py +++ b/graphium/ipu/ipu_simple_lightning.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import lightning from lightning_graphcore import IPUStrategy from lightning.pytorch.loggers import WandbLogger diff --git a/graphium/ipu/ipu_utils.py b/graphium/ipu/ipu_utils.py index c5140ecb5..1d16bfbfd 100644 --- a/graphium/ipu/ipu_utils.py +++ b/graphium/ipu/ipu_utils.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import os import tempfile from datetime import datetime diff --git a/graphium/ipu/ipu_wrapper.py b/graphium/ipu/ipu_wrapper.py index 0ac04b883..11a8436b8 100644 --- a/graphium/ipu/ipu_wrapper.py +++ b/graphium/ipu/ipu_wrapper.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Dict, Any, Optional, Callable, Union, Type, Tuple, Iterable from torch_geometric.data import Batch diff --git a/graphium/ipu/to_dense_batch.py b/graphium/ipu/to_dense_batch.py index 9198ccf3f..4b5351eba 100644 --- a/graphium/ipu/to_dense_batch.py +++ b/graphium/ipu/to_dense_batch.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Optional, Tuple import torch diff --git a/graphium/nn/architectures/encoder_manager.py b/graphium/nn/architectures/encoder_manager.py index 69fb8866e..f1e4784a4 100644 --- a/graphium/nn/architectures/encoder_manager.py +++ b/graphium/nn/architectures/encoder_manager.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Iterable, Dict, Any, Optional from torch_geometric.data import Batch diff --git a/graphium/nn/architectures/global_architectures.py b/graphium/nn/architectures/global_architectures.py index db28adac4..a28bb4cfe 100644 --- a/graphium/nn/architectures/global_architectures.py +++ b/graphium/nn/architectures/global_architectures.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Iterable, List, Dict, Literal, Tuple, Union, Callable, Any, Optional, Type from torch_geometric.data import Batch from graphium.ipu.to_dense_batch import to_dense_batch @@ -923,9 +922,7 @@ def __init__( self.out_dim_edges = ( out_dim_edges if out_dim_edges is not None - else self.hidden_dims_edges[-1] - if self.hidden_dims_edges - else 0 + else self.hidden_dims_edges[-1] if self.hidden_dims_edges else 0 ) self.full_dims_edges = None if len(self.hidden_dims_edges) or self.out_dim_edges > 0: diff --git a/graphium/nn/architectures/pyg_architectures.py b/graphium/nn/architectures/pyg_architectures.py index 3bd4d10c1..6e9b9ff90 100644 --- a/graphium/nn/architectures/pyg_architectures.py +++ b/graphium/nn/architectures/pyg_architectures.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from torch import Tensor from torch.nn import Module from typing import Tuple, Union, List, Optional diff --git a/graphium/nn/base_graph_layer.py b/graphium/nn/base_graph_layer.py index 66869f888..98fd855be 100644 --- a/graphium/nn/base_graph_layer.py +++ b/graphium/nn/base_graph_layer.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import abc from typing import Union, Callable, List, Optional, Mapping from copy import deepcopy diff --git a/graphium/nn/base_layers.py b/graphium/nn/base_layers.py index dbc89f19f..61e20f1eb 100644 --- a/graphium/nn/base_layers.py +++ b/graphium/nn/base_layers.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Union, Callable, Optional, Type, Tuple, Iterable from copy import deepcopy from loguru import logger diff --git a/graphium/nn/encoders/signnet_pos_encoder.py b/graphium/nn/encoders/signnet_pos_encoder.py index e95278cd5..1fc0a630e 100644 --- a/graphium/nn/encoders/signnet_pos_encoder.py +++ b/graphium/nn/encoders/signnet_pos_encoder.py @@ -2,6 +2,7 @@ SignNet https://arxiv.org/abs/2202.13013 based on https://github.com/cptq/SignNet-BasisNet """ + from typing import Dict, Any, Optional, List import torch diff --git a/graphium/nn/ensemble_layers.py b/graphium/nn/ensemble_layers.py index 7b6aa4668..284de4b77 100644 --- a/graphium/nn/ensemble_layers.py +++ b/graphium/nn/ensemble_layers.py @@ -10,7 +10,6 @@ -------------------------------------------------------------------------------- """ - from typing import Union, Callable, Optional, Type, Tuple, Iterable from copy import deepcopy from loguru import logger diff --git a/graphium/nn/pyg_layers/dimenet_pyg.py b/graphium/nn/pyg_layers/dimenet_pyg.py index 2fc6d3526..28326c35e 100644 --- a/graphium/nn/pyg_layers/dimenet_pyg.py +++ b/graphium/nn/pyg_layers/dimenet_pyg.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Callable, Union, Optional, Tuple from functools import partial diff --git a/graphium/nn/pyg_layers/gated_gcn_pyg.py b/graphium/nn/pyg_layers/gated_gcn_pyg.py index d36a0ef3a..37fbdc41f 100644 --- a/graphium/nn/pyg_layers/gated_gcn_pyg.py +++ b/graphium/nn/pyg_layers/gated_gcn_pyg.py @@ -10,7 +10,6 @@ -------------------------------------------------------------------------------- """ - from typing import Union, Callable from functools import partial diff --git a/graphium/nn/pyg_layers/gcn_pyg.py b/graphium/nn/pyg_layers/gcn_pyg.py index c42520877..5f7bd899c 100644 --- a/graphium/nn/pyg_layers/gcn_pyg.py +++ b/graphium/nn/pyg_layers/gcn_pyg.py @@ -10,7 +10,6 @@ -------------------------------------------------------------------------------- """ - from typing import Callable, Union from functools import partial diff --git a/graphium/nn/pyg_layers/gin_pyg.py b/graphium/nn/pyg_layers/gin_pyg.py index afd43e965..e6077f522 100644 --- a/graphium/nn/pyg_layers/gin_pyg.py +++ b/graphium/nn/pyg_layers/gin_pyg.py @@ -10,7 +10,6 @@ -------------------------------------------------------------------------------- """ - from typing import Callable, Union, Optional from functools import partial diff --git a/graphium/nn/pyg_layers/gps_pyg.py b/graphium/nn/pyg_layers/gps_pyg.py index 3d8671c53..4e2c58a52 100644 --- a/graphium/nn/pyg_layers/gps_pyg.py +++ b/graphium/nn/pyg_layers/gps_pyg.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import torch from copy import deepcopy from typing import Callable, Union, Optional, Dict, Any diff --git a/graphium/nn/pyg_layers/mpnn_pyg.py b/graphium/nn/pyg_layers/mpnn_pyg.py index 995475c8c..7302ffe4c 100644 --- a/graphium/nn/pyg_layers/mpnn_pyg.py +++ b/graphium/nn/pyg_layers/mpnn_pyg.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Callable, Optional, Union, Tuple, List import torch diff --git a/graphium/nn/pyg_layers/pna_pyg.py b/graphium/nn/pyg_layers/pna_pyg.py index 2ff098b36..196320398 100644 --- a/graphium/nn/pyg_layers/pna_pyg.py +++ b/graphium/nn/pyg_layers/pna_pyg.py @@ -10,7 +10,6 @@ -------------------------------------------------------------------------------- """ - from typing import Dict, List, Optional, Union, Callable from functools import partial diff --git a/graphium/nn/pyg_layers/pooling_pyg.py b/graphium/nn/pyg_layers/pooling_pyg.py index 3147c19d1..8eb8e9da5 100644 --- a/graphium/nn/pyg_layers/pooling_pyg.py +++ b/graphium/nn/pyg_layers/pooling_pyg.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import torch import torch.nn as nn from torch import Tensor, LongTensor diff --git a/graphium/nn/pyg_layers/utils.py b/graphium/nn/pyg_layers/utils.py index 83dc4f737..790b5d0aa 100644 --- a/graphium/nn/pyg_layers/utils.py +++ b/graphium/nn/pyg_layers/utils.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import math import torch import torch.nn as nn diff --git a/graphium/nn/residual_connections.py b/graphium/nn/residual_connections.py index 9f2c383de..c9923a764 100644 --- a/graphium/nn/residual_connections.py +++ b/graphium/nn/residual_connections.py @@ -10,7 +10,6 @@ -------------------------------------------------------------------------------- """ - """ Different types of residual connections, including None, Simple (ResNet-like), Concat and DenseNet diff --git a/graphium/nn/utils.py b/graphium/nn/utils.py index 90a6e2cd9..73b297dc6 100644 --- a/graphium/nn/utils.py +++ b/graphium/nn/utils.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import abc import inspect from numbers import Real diff --git a/graphium/trainer/losses.py b/graphium/trainer/losses.py index 05603d8c2..5cd219108 100644 --- a/graphium/trainer/losses.py +++ b/graphium/trainer/losses.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Optional import torch diff --git a/graphium/trainer/metrics.py b/graphium/trainer/metrics.py index 22361faa6..41e469038 100644 --- a/graphium/trainer/metrics.py +++ b/graphium/trainer/metrics.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Union, Callable, Optional, Dict, Any import sys diff --git a/graphium/trainer/predictor.py b/graphium/trainer/predictor.py index 8cfb1ad28..8d328c8aa 100644 --- a/graphium/trainer/predictor.py +++ b/graphium/trainer/predictor.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import time from copy import deepcopy from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union @@ -115,9 +114,9 @@ def __init__( loss_fun=loss_fun[task], metrics=metrics[task], metrics_on_progress_bar=metrics_on_progress_bar[task], - metrics_on_training_set=metrics_on_training_set[task] - if metrics_on_training_set is not None - else None, + metrics_on_training_set=( + metrics_on_training_set[task] if metrics_on_training_set is not None else None + ), ) eval_options[task].check_metrics_validity() diff --git a/graphium/trainer/predictor_options.py b/graphium/trainer/predictor_options.py index 04a62e84b..3a7e2436a 100644 --- a/graphium/trainer/predictor_options.py +++ b/graphium/trainer/predictor_options.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - r"""Data classes to group together related arguments for the creation of a Predictor Module.""" @@ -46,6 +45,7 @@ class ModelOptions: model_kwargs: Key-word arguments used to initialize the model from `model_class`. """ + model_class: Type[nn.Module] model_kwargs: Dict[str, Any] @@ -82,6 +82,7 @@ class OptimOptions: scheduler_class: The class to use for the scheduler, or the str representing the scheduler. """ + optim_kwargs: Optional[Dict[str, Any]] = None torch_scheduler_kwargs: Optional[Dict[str, Any]] = None scheduler_kwargs: Optional[Dict[str, Any]] = None @@ -157,6 +158,7 @@ class EvalOptions: If `None`, all the metrics are computed. Using less metrics can significantly improve performance, depending on the number of readouts. """ + loss_fun: Union[str, Dict, Callable] metrics: Dict[str, Callable] = None metrics_on_progress_bar: List[str] = field(default_factory=List[str]) @@ -236,6 +238,7 @@ class FlagOptions: - alpha: A float that specifies the ascent step size when running FLAG. Default=0.01 """ + flag_kwargs: Dict[str, Any] = None # Set the parameters and default values for the FLAG adversarial augmentation, and check values diff --git a/graphium/trainer/predictor_summaries.py b/graphium/trainer/predictor_summaries.py index 4cec79377..9c880c409 100644 --- a/graphium/trainer/predictor_summaries.py +++ b/graphium/trainer/predictor_summaries.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - r"""Classes to store information about resulting evaluation metrics when using a Predictor Module.""" from typing import Any, Callable, Dict, List, Optional, Union @@ -285,9 +284,9 @@ def get_metrics_logs(self) -> Dict[str, Any]: # Convert all metrics to CPU, except for the loss # metric_logs[f"{self.loss_fun._get_name()}/{self.step_name}"] = self.loss.detach().cpu() - metric_logs[ - self.metric_log_name(self.task_name, self.loss_fun._get_name(), self.step_name) - ] = self.loss.detach().cpu() + metric_logs[self.metric_log_name(self.task_name, self.loss_fun._get_name(), self.step_name)] = ( + self.loss.detach().cpu() + ) # print("Metrics logs keys: ", metric_logs.keys()) metric_logs = {key: metric.detach().cpu() for key, metric in metric_logs.items()} diff --git a/graphium/utils/arg_checker.py b/graphium/utils/arg_checker.py index cc5837c19..96b402694 100644 --- a/graphium/utils/arg_checker.py +++ b/graphium/utils/arg_checker.py @@ -10,7 +10,6 @@ -------------------------------------------------------------------------------- """ - """ Argument checker module """ import collections import numpy as np diff --git a/graphium/utils/command_line_utils.py b/graphium/utils/command_line_utils.py index 3e75979d3..c9020b749 100644 --- a/graphium/utils/command_line_utils.py +++ b/graphium/utils/command_line_utils.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import re from collections import defaultdict from typing import List, Dict diff --git a/graphium/utils/custom_lr.py b/graphium/utils/custom_lr.py index beed53816..80a896f12 100644 --- a/graphium/utils/custom_lr.py +++ b/graphium/utils/custom_lr.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import warnings from torch.optim.lr_scheduler import _LRScheduler diff --git a/graphium/utils/fs.py b/graphium/utils/fs.py index a5ba7c5e9..b2bb14fbb 100644 --- a/graphium/utils/fs.py +++ b/graphium/utils/fs.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Union from typing import Optional diff --git a/graphium/utils/hashing.py b/graphium/utils/hashing.py index a92ac0212..f73779d0c 100644 --- a/graphium/utils/hashing.py +++ b/graphium/utils/hashing.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import Any import hashlib import yaml diff --git a/graphium/utils/moving_average_tracker.py b/graphium/utils/moving_average_tracker.py index ec05e4bc4..675959eed 100644 --- a/graphium/utils/moving_average_tracker.py +++ b/graphium/utils/moving_average_tracker.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from dataclasses import dataclass diff --git a/graphium/utils/mup.py b/graphium/utils/mup.py index f538aadc6..d7261835b 100644 --- a/graphium/utils/mup.py +++ b/graphium/utils/mup.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - ##### Code adapted from the `mup` package from Microsoft https://github.com/microsoft/mup from torch.nn import Linear diff --git a/graphium/utils/packing.py b/graphium/utils/packing.py index 6db6856b1..a4d3d2e68 100644 --- a/graphium/utils/packing.py +++ b/graphium/utils/packing.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from typing import List, Tuple, Iterable, Optional import numpy as np import torch diff --git a/graphium/utils/safe_run.py b/graphium/utils/safe_run.py index 2db0af4dc..a04d1f9dd 100644 --- a/graphium/utils/safe_run.py +++ b/graphium/utils/safe_run.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from loguru import logger import traceback as tb diff --git a/graphium/utils/spaces.py b/graphium/utils/spaces.py index 88812c0be..cef2ec981 100644 --- a/graphium/utils/spaces.py +++ b/graphium/utils/spaces.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from copy import deepcopy import torch import torch.optim.lr_scheduler as sc diff --git a/graphium/utils/tensor.py b/graphium/utils/tensor.py index 87b578011..676759f36 100644 --- a/graphium/utils/tensor.py +++ b/graphium/utils/tensor.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import os import numpy as np import pandas as pd @@ -415,7 +414,7 @@ def tensor_fp16_to_fp32(tensor: Tensor) -> Tensor: def dict_tensor_fp16_to_fp32( - dict_tensor: Union[Tensor, Dict[str, Tensor], Dict[str, Dict[str, Tensor]]] + dict_tensor: Union[Tensor, Dict[str, Tensor], Dict[str, Dict[str, Tensor]]], ) -> Union[Tensor, Dict[str, Tensor], Dict[str, Dict[str, Tensor]]]: r"""Recursively Cast a dictionary of tensors from fp16 to fp32 if it is in fp16 diff --git a/profiling/profile_mol_to_graph.py b/profiling/profile_mol_to_graph.py index 423f487cf..08671f1f2 100644 --- a/profiling/profile_mol_to_graph.py +++ b/profiling/profile_mol_to_graph.py @@ -10,7 +10,6 @@ -------------------------------------------------------------------------------- """ - from tqdm import tqdm import datamol as dm import pickle diff --git a/profiling/profile_one_of_k_encoding.py b/profiling/profile_one_of_k_encoding.py index 7614ac235..02866f14c 100644 --- a/profiling/profile_one_of_k_encoding.py +++ b/profiling/profile_one_of_k_encoding.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from tqdm import tqdm from graphium.utils.tensor import one_of_k_encoding diff --git a/profiling/profile_predictor.py b/profiling/profile_predictor.py index 44cf83a08..c5a36f3b0 100644 --- a/profiling/profile_predictor.py +++ b/profiling/profile_predictor.py @@ -10,7 +10,6 @@ -------------------------------------------------------------------------------- """ - from tqdm import tqdm import os from time import time diff --git a/scripts/convert_yml.py b/scripts/convert_yml.py index 90cc876f4..b2b535969 100644 --- a/scripts/convert_yml.py +++ b/scripts/convert_yml.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - """ Convert the dependencies from conda's `env.yml` to pip `requirements.txt` """ diff --git a/tests/conftest.py b/tests/conftest.py index e36e32356..6fbd15af9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,7 +10,6 @@ -------------------------------------------------------------------------------- """ - import pathlib import pytest diff --git a/tests/test_architectures.py b/tests/test_architectures.py index 6a367b531..0e0726668 100644 --- a/tests/test_architectures.py +++ b/tests/test_architectures.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - """ Unit tests for the different architectures of graphium/nn/architectures... @@ -643,9 +642,11 @@ def test_forward_densenet_residual(self): f = gnn.layers[0].out_dim_factor f2 = [ - ((ii // residual_skip_steps) + 1) * f - if ((ii % residual_skip_steps) == 0 and ii > 0) - else f + ( + ((ii // residual_skip_steps) + 1) * f + if ((ii % residual_skip_steps) == 0 and ii > 0) + else f + ) for ii in range(6) ] self.assertEqual(gnn.layers[0].in_dim, self.in_dim, msg=err_msg) diff --git a/tests/test_attention.py b/tests/test_attention.py index 28b9cd2a1..a223ebb4c 100644 --- a/tests/test_attention.py +++ b/tests/test_attention.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - """ Unit tests for the attention layer """ diff --git a/tests/test_base_layers.py b/tests/test_base_layers.py index 2093619f2..8802aae6f 100644 --- a/tests/test_base_layers.py +++ b/tests/test_base_layers.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - """ Unit tests for the different layers of graphium/nn/base_layers """ diff --git a/tests/test_collate.py b/tests/test_collate.py index 3cb453b32..3fc0888fc 100644 --- a/tests/test_collate.py +++ b/tests/test_collate.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - """ Unit tests for the collate """ diff --git a/tests/test_data_utils.py b/tests/test_data_utils.py index 6b73110de..8caf8bfcf 100644 --- a/tests/test_data_utils.py +++ b/tests/test_data_utils.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import pandas as pd import unittest as ut import graphium diff --git a/tests/test_datamodule.py b/tests/test_datamodule.py index 824b80d50..d9b880b49 100644 --- a/tests/test_datamodule.py +++ b/tests/test_datamodule.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import unittest as ut import numpy as np import torch diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 4a7173244..d09384bbf 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import unittest as ut from graphium.data import load_micro_zinc diff --git a/tests/test_ensemble_layers.py b/tests/test_ensemble_layers.py index 2ad3cb6ec..7685726ca 100644 --- a/tests/test_ensemble_layers.py +++ b/tests/test_ensemble_layers.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - """ Unit tests for the different layers of graphium/nn/ensemble_layers """ diff --git a/tests/test_featurizer.py b/tests/test_featurizer.py index e8f666365..b9814e88a 100644 --- a/tests/test_featurizer.py +++ b/tests/test_featurizer.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - """ Unit tests for the different datasets of graphium/features/featurizer.py """ diff --git a/tests/test_finetuning.py b/tests/test_finetuning.py index 52484c4c9..8a99c465e 100644 --- a/tests/test_finetuning.py +++ b/tests/test_finetuning.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import os import unittest as ut from copy import deepcopy diff --git a/tests/test_ipu_dataloader.py b/tests/test_ipu_dataloader.py index 436d609d4..40cd3d773 100644 --- a/tests/test_ipu_dataloader.py +++ b/tests/test_ipu_dataloader.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - # General imports import yaml import unittest as ut diff --git a/tests/test_ipu_losses.py b/tests/test_ipu_losses.py index cb18eee47..b86b5e5b6 100644 --- a/tests/test_ipu_losses.py +++ b/tests/test_ipu_losses.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import unittest as ut import torch from torch.nn import BCELoss, MSELoss, L1Loss, BCEWithLogitsLoss diff --git a/tests/test_ipu_metrics.py b/tests/test_ipu_metrics.py index ee4801e7b..c48c980ef 100644 --- a/tests/test_ipu_metrics.py +++ b/tests/test_ipu_metrics.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import unittest as ut import torch from torchmetrics.functional import ( diff --git a/tests/test_ipu_options.py b/tests/test_ipu_options.py index c3cc9aa3e..3da79da2c 100644 --- a/tests/test_ipu_options.py +++ b/tests/test_ipu_options.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import pytest from graphium.config._loader import _get_ipu_opts, load_ipu_options from graphium.ipu.ipu_utils import ipu_options_list_to_file diff --git a/tests/test_ipu_poptorch.py b/tests/test_ipu_poptorch.py index 4f951d504..27385c5ab 100644 --- a/tests/test_ipu_poptorch.py +++ b/tests/test_ipu_poptorch.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import pytest diff --git a/tests/test_ipu_to_dense_batch.py b/tests/test_ipu_to_dense_batch.py index 55c6e3372..636c1709c 100644 --- a/tests/test_ipu_to_dense_batch.py +++ b/tests/test_ipu_to_dense_batch.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import pytest import torch from torch_geometric.data import Data, Batch diff --git a/tests/test_loaders.py b/tests/test_loaders.py index 22611f32f..91d7cda20 100644 --- a/tests/test_loaders.py +++ b/tests/test_loaders.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - from graphium.config._loader import merge_dicts from copy import deepcopy import unittest as ut diff --git a/tests/test_losses.py b/tests/test_losses.py index b2f343bf9..bb2e9a868 100644 --- a/tests/test_losses.py +++ b/tests/test_losses.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - """ Unit tests for the metrics and wrappers of graphium/trainer/metrics/... """ diff --git a/tests/test_metrics.py b/tests/test_metrics.py index dc5bc01b2..e6f6f967f 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - """ Unit tests for the metrics and wrappers of graphium/trainer/metrics/... """ diff --git a/tests/test_mtl_architecture.py b/tests/test_mtl_architecture.py index 63da58cdf..b06e4607f 100644 --- a/tests/test_mtl_architecture.py +++ b/tests/test_mtl_architecture.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - """ Unit tests for the different architectures of graphium/nn/architectures... diff --git a/tests/test_multitask_datamodule.py b/tests/test_multitask_datamodule.py index b8d2119e1..506054a36 100644 --- a/tests/test_multitask_datamodule.py +++ b/tests/test_multitask_datamodule.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import shutil import tempfile import unittest as ut diff --git a/tests/test_mup.py b/tests/test_mup.py index b60e0ccf3..2c054b8f4 100644 --- a/tests/test_mup.py +++ b/tests/test_mup.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - """ Unit tests for the implementation of mup """ diff --git a/tests/test_packing.py b/tests/test_packing.py index 3b378214b..c38c6ba86 100644 --- a/tests/test_packing.py +++ b/tests/test_packing.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - # General imports import unittest as ut import numpy as np diff --git a/tests/test_pe_nodepair.py b/tests/test_pe_nodepair.py index f90ce728b..e1275f0b6 100644 --- a/tests/test_pe_nodepair.py +++ b/tests/test_pe_nodepair.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - """ Unit tests for the positional encodings in graphium/features/* """ diff --git a/tests/test_pe_rw.py b/tests/test_pe_rw.py index 938df28da..bbad88f10 100644 --- a/tests/test_pe_rw.py +++ b/tests/test_pe_rw.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - """ Unit tests for the positional encodings in graphium/features/* """ diff --git a/tests/test_pe_spectral.py b/tests/test_pe_spectral.py index 400eb9630..6738628eb 100644 --- a/tests/test_pe_spectral.py +++ b/tests/test_pe_spectral.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - """ Unit tests for the positional encodings in graphium/features/* """ diff --git a/tests/test_pos_transfer_funcs.py b/tests/test_pos_transfer_funcs.py index 5062cbe46..18aaf5faf 100644 --- a/tests/test_pos_transfer_funcs.py +++ b/tests/test_pos_transfer_funcs.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - """ Unit tests for the positional encodings in graphium/features/* """ diff --git a/tests/test_positional_encoders.py b/tests/test_positional_encoders.py index 166929ba2..7e593200b 100644 --- a/tests/test_positional_encoders.py +++ b/tests/test_positional_encoders.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - """ Unit tests for the different datasets of graphium/features/featurizer.py """ diff --git a/tests/test_positional_encodings.py b/tests/test_positional_encodings.py index 89bf355a4..6fff51e97 100644 --- a/tests/test_positional_encodings.py +++ b/tests/test_positional_encodings.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - """ Unit tests for the positional encodings in graphium/features/* """ diff --git a/tests/test_predictor.py b/tests/test_predictor.py index 1ef69775f..69b97357a 100644 --- a/tests/test_predictor.py +++ b/tests/test_predictor.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - """ Unit tests for the file graphium/trainer/predictor.py """ diff --git a/tests/test_pyg_layers.py b/tests/test_pyg_layers.py index 03498eb35..766ba09db 100644 --- a/tests/test_pyg_layers.py +++ b/tests/test_pyg_layers.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - """ Unit tests for the different layers of graphium/nn/pyg_layers/... diff --git a/tests/test_residual_connections.py b/tests/test_residual_connections.py index 1e9387894..3166a7f46 100644 --- a/tests/test_residual_connections.py +++ b/tests/test_residual_connections.py @@ -10,7 +10,6 @@ -------------------------------------------------------------------------------- """ - """ Unit tests for the file residual_connections.py """ diff --git a/tests/test_training.py b/tests/test_training.py index 2d98c7cd5..cfe8dc6df 100644 --- a/tests/test_training.py +++ b/tests/test_training.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - import pytest from graphium.cli.train_finetune_test import cli import sys diff --git a/tests/test_utils.py b/tests/test_utils.py index e059223f9..60b2ff050 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -11,7 +11,6 @@ -------------------------------------------------------------------------------- """ - """ Unit tests for the metrics and wrappers of graphium/utils/... """