diff --git a/.codespell-ignore.txt b/.codespell-ignore.txt new file mode 100644 index 00000000..f1df0491 --- /dev/null +++ b/.codespell-ignore.txt @@ -0,0 +1,18 @@ +fo +te +esy +lsat +fof +bufer +gud +te +nd +teot +noo +nd +tebu +3st +bu +dasy +nd +ot diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fb4e0958..2a66acd7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ default_stages: [commit] repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer @@ -26,58 +26,68 @@ repos: - id: check-yaml - repo: https://github.com/psf/black - rev: 22.3.0 + rev: 23.12.0 hooks: - id: black exclude_types: [image] - repo: https://github.com/asottile/pyupgrade - rev: v2.10.0 + rev: v3.15.0 hooks: - id: pyupgrade - repo: https://github.com/timothycrosley/isort - rev: 5.12.0 + rev: 5.13.2 hooks: - id: isort - - repo: https://gitlab.com/pycqa/flake8 - rev: 3.8.2 + - repo: https://github.com/pycqa/flake8 + rev: 6.1.0 hooks: - id: flake8 additional_dependencies: [flake8-isort] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.941 + rev: v1.7.1 hooks: - id: mypy exclude: ^docs/ args: [--config-file=mypy.ini] - repo: https://github.com/compilerla/conventional-pre-commit - rev: v1.2.0 + rev: v3.0.0 hooks: - id: conventional-pre-commit stages: [commit-msg] - repo: https://github.com/asottile/blacken-docs - rev: v1.8.0 + rev: 1.16.0 hooks: - id: blacken-docs - additional_dependencies: [black==20.8b1] + additional_dependencies: [black==22.3.0] - repo: https://github.com/pycqa/pydocstyle - rev: 6.1.1 + rev: 6.3.0 hooks: - id: pydocstyle name: Checking docstring style. args: ["--convention=google","--add-ignore=D100,D202,D101,D415"] - repo: https://github.com/econchick/interrogate - rev: 1.4.0 + rev: 1.5.0 hooks: - id: interrogate name: Checking docstring code coverage. args: ["--config=pyproject.toml","marl_eval"] pass_filenames: false + + - repo: https://github.com/codespell-project/codespell + rev: v2.2.6 + hooks: + - id: codespell + name: codespell + description: Checks for common misspellings in text files. + entry: codespell --ignore-words=.codespell-ignore.txt + language: python + types: [text] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e380e962..f93bc469 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -3,7 +3,7 @@ We'd love to accept your patches and contributions to this project as it will take the joint efforts of the MARL community to ensure that the evaluation standard is raised! There are just a few small guidelines you need to follow. -## Installing MARL-eval for developement +## Installing MARL-eval for development To develop features for marl-eval, clone the repository and install all the dependencies as follows: diff --git a/bash_scripts/tests.sh b/bash_scripts/tests.sh index c60d5aca..b8eacbf8 100755 --- a/bash_scripts/tests.sh +++ b/bash_scripts/tests.sh @@ -34,7 +34,7 @@ pip install virtualenv virtualenv marl_eval_testing source marl_eval_testing/bin/activate -# Install depedencies +# Install dependencies pip install .[dev] # Run tests diff --git a/examples/quickstart.ipynb b/examples/quickstart.ipynb index d4a2fde4..e3992a68 100644 --- a/examples/quickstart.ipynb +++ b/examples/quickstart.ipynb @@ -83,7 +83,7 @@ "# Choose the list of metrics to normalize\n", "METRICS_TO_NORMALIZE = [\"return\"]\n", "\n", - "# Call data_process_pipeline to normalize the choosen metrics and to clean the data\n", + "# Call data_process_pipeline to normalize the chosen metrics and to clean the data\n", "processed_data = data_process_pipeline(\n", " raw_data=raw_data, metrics_to_normalize=METRICS_TO_NORMALIZE\n", ")" diff --git a/examples/simple_example.py b/examples/simple_example.py index 7c46a97a..99ae58df 100644 --- a/examples/simple_example.py +++ b/examples/simple_example.py @@ -33,7 +33,7 @@ ############################## METRICS_TO_NORMALIZE = ["return"] -with open("examples/example_results.json", "r") as f: +with open("examples/example_results.json") as f: raw_data = json.load(f) processed_data = data_process_pipeline( diff --git a/marl_eval/utils/data_processing_utils.py b/marl_eval/utils/data_processing_utils.py index 39672358..432b2f70 100644 --- a/marl_eval/utils/data_processing_utils.py +++ b/marl_eval/utils/data_processing_utils.py @@ -130,7 +130,6 @@ def get_and_aggregate_data_single_task( for step in steps: # Loop over each algorithm for algorithm in algorithms: - # Get the data for the given algorithm algorithm_data = task_data[algorithm] # Compute the mean and 95% CI for the given algorithm over all seeds @@ -482,7 +481,6 @@ def _select_metrics_for_plotting(absolute_metrics: list) -> list: for metric in mean_absolute_metrics: final_metric_tensor_dictionary[metric] = {} for algorithm in algorithms: - final_metric_tensor_dictionary[metric][algorithm] = np.stack( master_metric_dictionary[metric][algorithm], axis=2 ) diff --git a/marl_eval/utils/diagnose_data_errors.py b/marl_eval/utils/diagnose_data_errors.py index b26148ec..7f03fd6f 100644 --- a/marl_eval/utils/diagnose_data_errors.py +++ b/marl_eval/utils/diagnose_data_errors.py @@ -102,10 +102,8 @@ def check_runs(self, num_runs: List) -> tuple: return True, num_runs[0] print( - "The number of runs is not identical through the different algorithms and scenarios.\n\ - The minimum number of runs is " - + str(min(num_runs)) - + " runs." + "The number of runs is not identical through the different algorithms and " + "scenarios.\nThe minimum number of runs is " + str(min(num_runs)) + " runs." ) return False, min(num_runs) @@ -133,7 +131,6 @@ def data_format(self) -> Dict[str, Any]: # noqa: C901 data_used: Dict[str, Any] = {} for env in self.raw_data.keys(): - # List of algorithms used in the experiment across the tasks algorithms_used = [] # List of num or runs used across the algos and the tasks @@ -144,12 +141,10 @@ def data_format(self) -> Dict[str, Any]: # noqa: C901 metrics_used = [] for task in self.raw_data[env].keys(): - # Append the list of used algorithms across the tasks algorithms_used.append(sorted(list(processed_data[env][task].keys()))) for algorithm in self.raw_data[env][task].keys(): - # Append the number of runs used across the different algos runs_used.append(len(processed_data[env][task][algorithm].keys())) diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index 4f07ad88..bf1bbcd2 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -9,3 +9,4 @@ toml pytest==6.2.4 pytest-xdist jax[cpu] +codespell==2.2.6 diff --git a/tests/data_processing_utils_test.py b/tests/data_processing_utils_test.py index 082ba598..bb89df84 100644 --- a/tests/data_processing_utils_test.py +++ b/tests/data_processing_utils_test.py @@ -44,7 +44,7 @@ @pytest.fixture def raw_data() -> Dict[str, Dict[str, Any]]: """Fixture for raw experiment data.""" - with open("tests/mock_data_test.json", "r") as f: + with open("tests/mock_data_test.json") as f: read_in_data = json.load(f) return read_in_data @@ -73,7 +73,7 @@ def test_matrices_for_rliable_full_environment_dataset( ) -> None: """Tests that arrays for rliable are created correctly for \ a full dataset containing multiple algorithms and tasks \ - for a given envionment.""" + for a given environment.""" processed_data = data_process_pipeline( raw_data=raw_data, metrics_to_normalize=["return"] diff --git a/tests/diagnose_data_errors_test.py b/tests/diagnose_data_errors_test.py index 361bda07..dcc9a614 100644 --- a/tests/diagnose_data_errors_test.py +++ b/tests/diagnose_data_errors_test.py @@ -26,7 +26,7 @@ @pytest.fixture def valid_raw_data() -> Dict[str, Dict[str, Any]]: """Fixture for raw experiment data.""" - with open("tests/mock_data_test.json", "r") as f: + with open("tests/mock_data_test.json") as f: read_in_data = json.load(f) return read_in_data @@ -35,7 +35,7 @@ def valid_raw_data() -> Dict[str, Dict[str, Any]]: @pytest.fixture def invalid_algo_raw_data() -> Dict[str, Dict[str, Any]]: """Fixture for raw experiment data.""" - with open("tests/mock_data_test.json", "r") as f: + with open("tests/mock_data_test.json") as f: read_in_data = json.load(f) del read_in_data["env_1"]["task_1"]["algo_1"] @@ -45,7 +45,7 @@ def invalid_algo_raw_data() -> Dict[str, Dict[str, Any]]: @pytest.fixture def invalid_metrics_raw_data() -> Dict[str, Dict[str, Any]]: """Fixture for raw experiment data.""" - with open("tests/mock_data_test.json", "r") as f: + with open("tests/mock_data_test.json") as f: read_in_data = json.load(f) del read_in_data["env_1"]["task_1"]["algo_1"]["43289"]["STEP_1"]["return"] @@ -55,7 +55,7 @@ def invalid_metrics_raw_data() -> Dict[str, Dict[str, Any]]: @pytest.fixture def invalid_runs_raw_data() -> Dict[str, Dict[str, Any]]: """Fixture for raw experiment data.""" - with open("tests/mock_data_test.json", "r") as f: + with open("tests/mock_data_test.json") as f: read_in_data = json.load(f) del read_in_data["env_1"]["task_2"]["algo_1"]["43289"] @@ -65,7 +65,7 @@ def invalid_runs_raw_data() -> Dict[str, Dict[str, Any]]: @pytest.fixture def invalid_steps_raw_data() -> Dict[str, Dict[str, Any]]: """Fixture for raw experiment data.""" - with open("tests/mock_data_test.json", "r") as f: + with open("tests/mock_data_test.json") as f: read_in_data = json.load(f) del read_in_data["env_1"]["task_1"]["algo_1"]["42"]["STEP_1"] @@ -75,7 +75,7 @@ def invalid_steps_raw_data() -> Dict[str, Dict[str, Any]]: @pytest.fixture def invalid_raw_data() -> Dict[str, Dict[str, Any]]: """Fixture for raw experiment data.""" - with open("tests/mock_data_test.json", "r") as f: + with open("tests/mock_data_test.json") as f: read_in_data = json.load(f) del read_in_data["env_1"]["task_2"]["algo_1"]