Merge pull request #517 from pcagas/test-improvement

Resolving pytest issues
mala-project · Jun 7, 2024 · e967711 · e967711
2 parents a1faeba + 032b21b
commit e967711
Show file tree

Hide file tree

Showing 31 changed files with 176 additions and 157 deletions.
diff --git a/.github/workflows/cpu-tests.yml b/.github/workflows/cpu-tests.yml
@@ -1,6 +1,7 @@
 name: CPU tests
 
 on:
+  workflow_dispatch:
   pull_request:
     # Trigger on pull requests to master or develop that are
     # marked as "ready for review" (non-draft PRs)
@@ -181,14 +182,14 @@ jobs:
           # Download test data repository from RODARE. If the version changes
           # this URL has to be adapted (the number after /record/ and the
           # version have to be incremented)
-          wget "https://rodare.hzdr.de/record/2999/files/mala-project/test-data-1.8.0.zip"
+          wget "https://rodare.hzdr.de/record/3004/files/mala-project/test-data-1.8.1.zip"
 
           # Once downloaded, we have to unzip the file. The name of the root
           # folder in the zip file has to be updated for data repository
           # updates as well - the string at the end is the hash of the data
           # repository commit.
-          unzip -q test-data-1.8.0.zip
-          mv mala-project-test-data-d5694c7  mala_data
+          unzip -q test-data-1.8.1.zip
+          mv mala-project-test-data-741eda6 mala_data
 
       - name: Test mala
         shell: 'bash -c "docker exec -i mala-cpu bash < {0}"'

diff --git a/examples/advanced/ex01_checkpoint_training.py b/examples/advanced/ex01_checkpoint_training.py
@@ -3,18 +3,16 @@
 import mala
 from mala import printout
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
+from mala.datahandling.data_repo import data_path
 
 """
-Shows how a training run can be paused and 
+Shows how a training run can be paused and
 resumed. Delete the ex07.zip file prior to execution to see the effect
 of checkpointing.
-Afterwards, execute this script twice to see how MALA progresses from a 
+Afterwards, execute this script twice to see how MALA progresses from a
 checkpoint. As the number of total epochs cannot be divided by the number
-of epochs after which a checkpoint is created without residual, this will 
-lead to MALA performing the missing epochs again. 
+of epochs after which a checkpoint is created without residual, this will
+lead to MALA performing the missing epochs again.
 """
 
 

diff --git a/examples/advanced/ex02_shuffle_data.py b/examples/advanced/ex02_shuffle_data.py
@@ -2,14 +2,12 @@
 
 import mala
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
+from mala.datahandling.data_repo import data_path
 
 """
 Shows how data can be shuffled amongst multiple
-snapshots, which is very useful in the lazy loading case, where this cannot be 
-easily done in memory. 
+snapshots, which is very useful in the lazy loading case, where this cannot be
+easily done in memory.
 """
 
 

diff --git a/examples/advanced/ex03_tensor_board.py b/examples/advanced/ex03_tensor_board.py
@@ -3,13 +3,10 @@
 import mala
 from mala import printout
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
-
+from mala.datahandling.data_repo import data_path
 
 """
-Shows how a NN training by MALA can be visualized using 
+Shows how a NN training by MALA can be visualized using
 tensorboard. The training is a basic MALA network training.
 """
 

diff --git a/examples/advanced/ex04_acsd.py b/examples/advanced/ex04_acsd.py
@@ -1,13 +1,11 @@
 import os
 
 import mala
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
+from mala.datahandling.data_repo import data_path
 
 """
 Shows how MALA can be used to optimize descriptor
-parameters based on the ACSD analysis (see hyperparameter paper in the 
+parameters based on the ACSD analysis (see hyperparameter paper in the
 documentation for mathematical details).
 """
 

diff --git a/examples/advanced/ex05_checkpoint_hyperparameter_optimization.py b/examples/advanced/ex05_checkpoint_hyperparameter_optimization.py
@@ -2,16 +2,14 @@
 
 import mala
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
+from mala.datahandling.data_repo import data_path
 
 """
-Shows how a hyperparameter optimization run can 
+Shows how a hyperparameter optimization run can
 be paused and resumed. Delete all ex04_*.pkl and ex04_*.pth prior to execution.
-Afterwards, execute this script twice to see how MALA progresses from a 
+Afterwards, execute this script twice to see how MALA progresses from a
 checkpoint. As the number of trials cannot be divided by the number
-of epochs after which a checkpoint is created without residual, this will 
+of epochs after which a checkpoint is created without residual, this will
 lead to MALA performing the missing trials again.
 """
 

diff --git a/examples/advanced/ex06_distributed_hyperparameter_optimization.py b/examples/advanced/ex06_distributed_hyperparameter_optimization.py
@@ -2,14 +2,12 @@
 
 import mala
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
+from mala.datahandling.data_repo import data_path
 
 """
-ex09_distributed_hyperopt.py: Shows how a hyperparameter 
+ex09_distributed_hyperopt.py: Shows how a hyperparameter
 optimization can be sped up using a RDB storage. Ideally this should be done
-using a database server system, such as PostgreSQL or MySQL. 
+using a database server system, such as PostgreSQL or MySQL.
 For this easy example, sqlite will be used. It is highly advisory not to
 to use this for actual, at-scale calculations!
 

diff --git a/examples/advanced/ex07_advanced_hyperparameter_optimization.py b/examples/advanced/ex07_advanced_hyperparameter_optimization.py
@@ -3,12 +3,10 @@
 import mala
 from mala import printout
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
+from mala.datahandling.data_repo import data_path
 
 """
-Shows how recent developments in hyperparameter optimization techniques can be 
+Shows how recent developments in hyperparameter optimization techniques can be
 used (OAT / training-free NAS).
 
 REQUIRES OAPACKAGE.

diff --git a/examples/advanced/ex08_visualize_observables.py b/examples/advanced/ex08_visualize_observables.py
@@ -2,18 +2,15 @@
 
 import mala
 
-from mala.datahandling.data_repo import data_repo_path
+from mala.datahandling.data_repo import data_path
 
-atoms_path = os.path.join(
-    os.path.join(data_repo_path, "Be2"), "Be_snapshot1.out"
-)
-ldos_path = os.path.join(
-    os.path.join(data_repo_path, "Be2"), "Be_snapshot1.out.npy"
-)
 """
-Shows how MALA can be used to visualize observables of interest. 
+Shows how MALA can be used to visualize observables of interest.
 """
 
+atoms_path = os.path.join(data_path, "Be_snapshot1.out")
+ldos_path = os.path.join(data_path, "Be_snapshot1.out.npy")
+
 ####################
 # 1. READ ELECTRONIC STRUCTURE DATA
 # This data may be read as part of an ML-DFT model inference.

diff --git a/examples/basic/ex01_train_network.py b/examples/basic/ex01_train_network.py
@@ -2,17 +2,14 @@
 
 import mala
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
+from mala.datahandling.data_repo import data_path
 
 """
 This example shows how a neural network can be trained on material
 data using this framework. It uses preprocessed data, that is read in
 from *.npy files.
 """
 
-
 ####################
 # 1. PARAMETERS
 # The first step of each MALA workflow is to define a parameters object and
@@ -93,5 +90,5 @@
 test_trainer.train_network()
 additional_calculation_data = os.path.join(data_path, "Be_snapshot0.out")
 test_trainer.save_run(
-    "be_model", additional_calculation_data=additional_calculation_data
+    "Be_model", additional_calculation_data=additional_calculation_data
 )
diff --git a/examples/basic/ex02_test_network.py b/examples/basic/ex02_test_network.py
@@ -3,17 +3,16 @@
 import mala
 from mala import printout
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
+from mala.datahandling.data_repo import data_path
 
 """
 This example shows how a trained network can be tested
 with additional test snapshots. Either execute ex01 before executing this one
 or download the appropriate model from the provided test data repo.
 """
-assert os.path.exists("be_model.zip"), "Be model missing, run ex01 first."
 
+model_name = "Be_model"
+model_path = "./" if os.path.exists("Be_model.zip") else data_path
 
 ####################
 # 1. LOADING A NETWORK
@@ -27,7 +26,9 @@
 # (output_format="list") or as an averaged value (output_format="mae")
 ####################
 
-parameters, network, data_handler, tester = mala.Tester.load_run("be_model")
+parameters, network, data_handler, tester = mala.Tester.load_run(
+    run_name=model_name, path=model_path
+)
 tester.observables_to_test = ["band_energy", "number_of_electrons"]
 tester.output_format = "list"
 parameters.data.use_lazy_loading = True

diff --git a/examples/basic/ex03_preprocess_data.py b/examples/basic/ex03_preprocess_data.py
@@ -2,13 +2,11 @@
 
 import mala
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
+from mala.datahandling.data_repo import data_path
 
 """
 Shows how this framework can be used to preprocess
-data. Preprocessing here means converting raw DFT calculation output into 
+data. Preprocessing here means converting raw DFT calculation output into
 numpy arrays of the correct size. For the input data, this means descriptor
 calculation.
 

diff --git a/examples/basic/ex04_hyperparameter_optimization.py b/examples/basic/ex04_hyperparameter_optimization.py
@@ -2,14 +2,12 @@
 
 import mala
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
+from mala.datahandling.data_repo import data_path
 
 """
 Shows how a hyperparameter optimization can be done using this framework. There
 are multiple hyperparameter optimizers available in this framework. This example
-focusses on the most universal one - optuna.  
+focusses on the most universal one - optuna.
 """
 
 

diff --git a/examples/basic/ex05_run_predictions.py b/examples/basic/ex05_run_predictions.py
@@ -4,27 +4,27 @@
 import mala
 from mala import printout
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
-
-assert os.path.exists("be_model.zip"), "Be model missing, run ex01 first."
+from mala.datahandling.data_repo import data_path
 
 """
-Show how a prediction can be made using MALA, based on only a
-trained network and atomic configurations.
+Show how a prediction can be made using MALA, based on only a trained network and atomic
+configurations. Either execute ex01 before executing this one or download the
+appropriate model from the provided test data repo.
 
 REQUIRES LAMMPS (and potentiall the total energy module).
 """
 
+model_name = "Be_model"
+model_path = "./" if os.path.exists("Be_model.zip") else data_path
+
 
 ####################
 # 1. LOADING A NETWORK
 # To use the predictor class to test an ML-DFT model, simply load it via the
 # Tester class interface. Afterwards, set the necessary parameters.
 ####################
 parameters, network, data_handler, predictor = mala.Predictor.load_run(
-    "be_model"
+    run_name=model_name, path=model_path
 )
 
 

diff --git a/examples/basic/ex06_ase_calculator.py b/examples/basic/ex06_ase_calculator.py
@@ -1,29 +1,29 @@
 import os
 
-import mala
 from ase.io import read
+import mala
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
-
-assert os.path.exists("be_model.zip"), "Be model missing, run ex01 first."
+from mala.datahandling.data_repo import data_path
 
 """
-Shows how MALA can be used as an ASE calculator. 
-Currently, calculation of forces is not supported. 
+Shows how MALA can be used as an ASE calculator.
+Currently, calculation of forces is not supported. Either execute ex01 before executing
+this one or download the appropriate model from the provided test data repo.
 
 REQUIRES LAMMPS AND QUANTUM ESPRESSO (TOTAL ENERGY MODULE).
 """
 
+model_name = "Be_model"
+model_path = "./" if os.path.exists("Be_model.zip") else data_path
+
 
 ####################
 # 1. LOADING A NETWORK
 # To use the ASE calculator, just load a MALA ML-DFT model.
 # Further make sure to set the path to the pseudopotential used during
 # data generation-
 ####################
-calculator = mala.MALA.load_model("be_model")
+calculator = mala.MALA.load_model(run_name=model_name, path=model_path)
 calculator.mala_parameters.targets.pseudopotential_path = data_path
 
 ####################

diff --git a/mala/datahandling/data_repo.py b/mala/datahandling/data_repo.py
@@ -14,9 +14,11 @@
 name = "MALA_DATA_REPO"
 if name in os.environ:
     data_repo_path = os.environ[name]
+    data_path = os.path.join(data_repo_path, "Be2")
 else:
     parallel_warn(
         f"Environment variable {name} not set. You won't be able "
         "to run all examples and tests."
     )
     data_repo_path = None
+    data_path = None
diff --git a/test/all_lazy_loading_test.py b/test/all_lazy_loading_test.py
@@ -7,9 +7,7 @@
 import torch
 import pytest
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
+from mala.datahandling.data_repo import data_path
 
 # This test compares the data scaling using the regular scaling procedure and
 # the lazy-loading one (incremental fitting).

diff --git a/test/basic_gpu_test.py b/test/basic_gpu_test.py
@@ -6,9 +6,9 @@
 which MALA relies on). Two things are tested:
 
 1. Whether or not your system has GPU support.
-2. Whether or not the GPU does what it is supposed to. For this, 
+2. Whether or not the GPU does what it is supposed to. For this,
 a training is performed. It is measured whether or not the utilization
-of the GPU results in a speed up. 
+of the GPU results in a speed up.
 """
 import os
 import time
@@ -19,9 +19,7 @@
 import pytest
 import torch
 
-from mala.datahandling.data_repo import data_repo_path
-
-data_path = os.path.join(data_repo_path, "Be2")
+from mala.datahandling.data_repo import data_path
 
 test_checkpoint_name = "test"