From 825da1d530f5ae4b3b9df075bc4304a488e0a2d5 Mon Sep 17 00:00:00 2001
From: Max Parzen <max.parzen@ed.ac.uk>
Date: Tue, 13 Jun 2023 15:11:33 +0100
Subject: [PATCH] add codespell linter

---
 .pre-commit-config.yaml                | 16 ++++++++--------
 doc/how_to_contribute.rst              |  2 +-
 doc/index.rst                          |  1 -
 doc/introduction.rst                   |  2 +-
 doc/learning_materials.rst             |  2 +-
 doc/release_notes.rst                  | 12 ++++++++----
 scripts/_helpers.py                    |  4 ++--
 scripts/add_electricity.py             |  2 +-
 scripts/base_network.py                |  2 +-
 scripts/build_bus_regions.py           |  2 +-
 scripts/build_demand_profiles.py       |  2 +-
 scripts/build_osm_network.py           | 10 +++++-----
 scripts/build_powerplants.py           |  4 ++--
 scripts/build_renewable_profiles.py    |  2 +-
 scripts/build_shapes.py                |  2 +-
 scripts/clean_osm_data.py              |  4 ++--
 scripts/non_workflow/zenodo_handler.py |  2 +-
 scripts/retrieve_databundle_light.py   |  2 +-
 scripts/simplify_network.py            |  4 ++--
 19 files changed, 40 insertions(+), 37 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 542a94903..7141f6949 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -26,14 +26,14 @@ repos:
   - id: isort
     args: ["--profile", "black", "--filter-files"]
 
-# # Find common spelling mistakes in comments and docstrings
-# - repo: https://github.com/codespell-project/codespell
-#   rev: v2.2.1
-#   hooks:
-#   - id: codespell
-#     args: ['--ignore-regex="\b[A-Z]+\b"'] # Ignore capital case words, e.g. country codes
-#     types_or: [python, rst, markdown]
-#     files: ^(actions|doc)/
+  # Find common spelling mistakes in comments and docstrings
+- repo: https://github.com/codespell-project/codespell
+  rev: v2.2.4
+  hooks:
+  - id: codespell
+    args: ['--ignore-regex="(\b[A-Z]+\b)"', '--ignore-words-list=fom,appartment,bage,ore,setis,tabacco,berfore'] # Ignore capital case words, e.g. country codes
+    types_or: [python, rst, markdown]
+    files: ^(scripts|doc)/
 
 # Formatting with "black" coding style
 - repo: https://github.com/psf/black
diff --git a/doc/how_to_contribute.rst b/doc/how_to_contribute.rst
index 90afc1490..1553ffc76 100644
--- a/doc/how_to_contribute.rst
+++ b/doc/how_to_contribute.rst
@@ -54,7 +54,7 @@ To contribute a test:
 Performance-profiling
 ---------------------
 Performance profiling is important to understand bottlenecks and
-the accordinly optimize the speed in PyPSA-Earth. We use the Python build-in
+the accordingly optimize the speed in PyPSA-Earth. We use the Python built-in
 `cProfiler`, custom decorators on single functions and analysis tools
 like `snakeviz <https://jiffyclub.github.io/snakeviz/>`_. See a detailed example
 in `this discussion #557 <https://github.com/pypsa-meets-earth/pypsa-earth/discussions/557>`_.
diff --git a/doc/index.rst b/doc/index.rst
index fd7def820..0bad0d500 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -188,4 +188,3 @@ Documentation
    learning_materials
    project_structure_and_credits
    talks_and_papers
-
diff --git a/doc/introduction.rst b/doc/introduction.rst
index 6e902c7a0..7b6eb91e1 100644
--- a/doc/introduction.rst
+++ b/doc/introduction.rst
@@ -80,7 +80,7 @@ PyPSA-Earth work is released under multiple licenses:
 * Configuration files are mostly licensed under `CC0-1.0 <https://creativecommons.org/publicdomain/zero/1.0/>`_.
 * Data files are licensed under different licenses as noted below.
 
-Invididual files contain license information in the header or in the `dep5 <.reuse/dep5>`_.
+Individual files contain license information in the header or in the `dep5 <.reuse/dep5>`_.
 Additional licenses and urls of the data used in PyPSA-Earth:
 
 .. csv-table::
diff --git a/doc/learning_materials.rst b/doc/learning_materials.rst
index eac4dff67..bcff843f0 100644
--- a/doc/learning_materials.rst
+++ b/doc/learning_materials.rst
@@ -33,7 +33,7 @@ PyPSA Introduction (essential)
 Data science basics (essential)
 --------------------------------
 
-- Fabian Neumann just shared with the world the possibly best training material for `"Data Science fo Energy System Modelling" <https://fneum.github.io/data-science-for-esm/intro.html>`_. This is a free multi-week course preparing you for all you need for PyPSA-Earth.
+- Fabian Neumann just shared with the world the possibly best training material for `"Data Science for Energy System Modelling" <https://fneum.github.io/data-science-for-esm/intro.html>`_. This is a free multi-week course preparing you for all you need for PyPSA-Earth.
 - Refresh your Python knowledge by watching `CSDojo's playlist <https://www.youtube.com/c/CSDojo/playlists>`_. His content is excellent as introduction. You will learn in effective short videos the python basics such as variables If/else statements, functions, lists, for loops, while loops, dictionaries, classes and objects, boolean, list comprehensions, sets - put your hands on and write some test scripts as the video suggests. (~3h)
 - Familiarize yourself with numpy and panda dataframes.  In the Python-based PyPSA tool, we do not work with Excel. Powerful panda dataframes are our friends. `Here <https://www.coursera.org/learn/python-data-analysis>`__ is an extensive 30h course that provides a great introduction if this is completely unfamiliar to you.
 - `Introduction to Unix-shell <https://swcarpentry.github.io/shell-novice/>`_ - "Use of the shell is fundamental to a wide range of advanced computing tasks, including high-performance computing and automated workflow. These lessons will introduce you to this powerful tool." (optional 4h, to become a pro)
diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index 6fd3958a0..95689315c 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -18,11 +18,15 @@ E.g. if a new rule becomes available describe how to use it `snakemake -j1 run_t
 
 * Add merge and replace functionalities when adding custom powerplants `PR #739 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/739>`__. "Merge" combined the powerplantmatching data with new custom data. "Replace" allows to use fully self-collected data.
 
-* Add functionality of attaching existing renewable caapcities from custom_powerplants.csv. `PR #744 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/744>`__. If custom_powerplants are enabled and custom_powerplants.csv contains wind or solar powerplants, then p_nom and p_nom_min for renewables are extracted from custom_powerplants.csv, aggregated for eacg bus, and set.
+* Add functionality of attaching existing renewable caapcities from custom_powerplants.csv. `PR #744 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/744>`__. If custom_powerplants are enabled and custom_powerplants.csv contains wind or solar powerplants, then p_nom and p_nom_min for renewables are extracted from custom_powerplants.csv, aggregated for each bus, and set.
 
 * Fix dask parallel computations for e.g. cutouts calculations. Now again more than 1 core will be used when available that can lead to ~8x speed ups with 8 cores `PR #734 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/734>`__ and `PR #761 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/761>`__.
 
-* Enable the usage of custom rules. Custom rule files must be specified in the config as a list, e.g. custom rules: ["my_rules.smk"]. Empty by default (i.e. no custom rules). `PR #755 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/755>`__
+* Add the usage of custom rules. Custom rule files must be specified in the config as a list, e.g. custom rules: ["my_rules.smk"]. Empty by default (i.e. no custom rules). `PR #755 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/755>`__
+
+* Add trailing whitespace linter which removes unnecessary tabs when running `pre-commit` `PR #762 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/762>`__
+
+* Add codespell linter which corrects word spellings `PR #763 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/763>`__
 
 PyPSA-Earth 0.2.1
 =================
@@ -49,7 +53,7 @@ PyPSA-Earth 0.2.0
 
 * Add new config test design. It is now easy and light to test multiple configs `PR #466 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/466>`__
 
-* Revision of documenation `PR #471 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/471>`__
+* Revision of documentation `PR #471 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/471>`__
 
 * Move to new GADM version `PR #478 <https://github.com/pypsa-meets-earth/pypsa-earth/pull/478>`__
 
@@ -134,7 +138,7 @@ PyPSA-Earth 0.2.0
 PyPSA-Earth 0.1.0
 =================
 
-Model rebranded from PyPSA-Africa to PyPSA-Earth. Model is part of the now called PyPSA meets Earth initiative which hosts mutliple projects.
+Model rebranded from PyPSA-Africa to PyPSA-Earth. Model is part of the now called PyPSA meets Earth initiative which hosts multiple projects.
 
 **New features and major changes (10th September 2022)**
 
diff --git a/scripts/_helpers.py b/scripts/_helpers.py
index 547f76b92..2c7f1ab91 100644
--- a/scripts/_helpers.py
+++ b/scripts/_helpers.py
@@ -45,8 +45,8 @@ def sets_path_to_root(root_directory_name):
             break
         # if repo_name NOT current folder name for 5 levels then stop
         if n == 0:
-            print("Cant find the repo path.")
-        # if repo_name NOT current folder name, go one dir higher
+            print("Can't find the repo path.")
+        # if repo_name NOT current folder name, go one directory higher
         else:
             upper_path = os.path.dirname(os.path.abspath("."))  # name of upper folder
             os.chdir(upper_path)
diff --git a/scripts/add_electricity.py b/scripts/add_electricity.py
index 98b07d736..0e8dd9c9a 100755
--- a/scripts/add_electricity.py
+++ b/scripts/add_electricity.py
@@ -789,7 +789,7 @@ def add_nice_carrier_names(n, config):
     if not (set(renewable_carriers) & set(extendable_carriers["Generator"])):
         logger.warning(
             "No renewables found in config entry `extendable_carriers`. "
-            "In future versions, these have to be explicitely listed. "
+            "In future versions, these have to be explicitly listed. "
             "Falling back to all renewables."
         )
 
diff --git a/scripts/base_network.py b/scripts/base_network.py
index a8769b95e..0ba198cb5 100644
--- a/scripts/base_network.py
+++ b/scripts/base_network.py
@@ -360,7 +360,7 @@ def _set_countries_and_substations(inputs, config, n):
     # Compares two lists & makes list value true if at least one is true
     buses["substation_off"] = offshore_b | offshore_hvb
 
-    # Busses without country tag are removed OR get a country tag if close to country
+    # Buses without country tag are removed OR get a country tag if close to country
     c_nan_b = buses.country.isnull()
     if c_nan_b.sum() > 0:
         c_tag = get_country(buses.loc[c_nan_b])
diff --git a/scripts/build_bus_regions.py b/scripts/build_bus_regions.py
index be2448d95..14a1b1cab 100644
--- a/scripts/build_bus_regions.py
+++ b/scripts/build_bus_regions.py
@@ -272,7 +272,7 @@ def get_id(coords):
     )
 
     if offshore_regions:
-        # if a offshore_regions exists excute below
+        # if a offshore_regions exists execute below
         pd.concat(offshore_regions, ignore_index=True).to_file(
             snakemake.output.regions_offshore
         )
diff --git a/scripts/build_demand_profiles.py b/scripts/build_demand_profiles.py
index 927539a3e..3357340b5 100644
--- a/scripts/build_demand_profiles.py
+++ b/scripts/build_demand_profiles.py
@@ -35,7 +35,7 @@
 Description
 -----------
 
-The rule :mod:`build_demand` creates load demand profiles in correspondance of the buses of the network.
+The rule :mod:`build_demand` creates load demand profiles in correspondence of the buses of the network.
 It creates the load paths for GEGIS outputs by combining the input parameters of the countries, weather year, prediction year, and SSP scenario.
 Then with a function that takes in the PyPSA network "base.nc", region and gadm shape data, the countries of interest, a scale factor, and the snapshots,
 it returns a csv file called "demand_profiles.csv", that allocates the load to the buses of the network according to GDP and population.
diff --git a/scripts/build_osm_network.py b/scripts/build_osm_network.py
index ebcf39889..0c53837dd 100644
--- a/scripts/build_osm_network.py
+++ b/scripts/build_osm_network.py
@@ -190,7 +190,7 @@ def merge_stations_same_station_id(
     # initialize list of cleaned buses
     buses_clean = []
 
-    # initalize the number of buses
+    # initialize the number of buses
     n_buses = 0
 
     for g_name, g_value in buses.groupby(by="station_id"):
@@ -588,7 +588,7 @@ def _split_linestring_by_point(linestring, points):
     Parameters
     ----------
     lstring : LineString
-        Linestring of the line to be splitted
+        Linestring of the line to be split
     points : list
         List of points to split the linestring
 
@@ -622,11 +622,11 @@ def fix_overpassing_lines(lines, buses, distance_crs, tol=1):
         Geodataframe of substations
     tol : float
         Tolerance in meters of the distance between the substation and the line
-        below which the line will be splitted
+        below which the line will be split
     """
 
     lines_to_add = []  # list of lines to be added
-    lines_to_split = []  # list of lines that have been splitted
+    lines_to_split = []  # list of lines that have been split
 
     lines_epsgmod = lines.to_crs(distance_crs)
     buses_epsgmod = buses.to_crs(distance_crs)
@@ -770,7 +770,7 @@ def built_network(inputs, outputs, config, geo_crs, distance_crs, force_ac=False
     bus_country_list = buses["country"].unique().tolist()
 
     # it may happen that bus_country_list contains entries not relevant as a country name (e.g. "not found")
-    # difference can't give negative values; the following will return only releant country names
+    # difference can't give negative values; the following will return only relevant country names
     no_data_countries = list(set(country_list).difference(set(bus_country_list)))
 
     if len(no_data_countries) > 0:
diff --git a/scripts/build_powerplants.py b/scripts/build_powerplants.py
index 5d5391a32..7aa263cc7 100644
--- a/scripts/build_powerplants.py
+++ b/scripts/build_powerplants.py
@@ -142,7 +142,7 @@ def convert_osm_to_pm(filepath_ppl_osm, filepath_ppl_pm):
                     "wave": "Other",
                     "geothermal": "Geothermal",
                     "solar": "Solar",
-                    # "Hard Coal" follows defauls of PPM
+                    # "Hard Coal" follows defaults of PPM
                     "coal": "Hard Coal",
                     "gas": "Natural Gas",
                     "biomass": "Bioenergy",
@@ -200,7 +200,7 @@ def convert_osm_to_pm(filepath_ppl_osm, filepath_ppl_pm):
     )
 
     # All Hydro objects can be interpreted by PPM as Storages, too
-    # However, everithing extracted from OSM seems to belong
+    # However, everything extracted from OSM seems to belong
     # to power plants with "tags.power" == "generator" only
     osm_ppm_df = pd.DataFrame(
         data={
diff --git a/scripts/build_renewable_profiles.py b/scripts/build_renewable_profiles.py
index 7cb9dc620..3faf8f031 100644
--- a/scripts/build_renewable_profiles.py
+++ b/scripts/build_renewable_profiles.py
@@ -374,7 +374,7 @@ def rescale_hydro(plants, runoff, normalize_using_yearly, normalization_year):
             yearlyavg_runoff_by_plant.loc[normalization_buses].groupby("country").sum()
         )
 
-        # common country indeces
+        # common country indices
         common_countries = normalize_using_yearly.columns.intersection(
             grouped_runoffs.index
         )
diff --git a/scripts/build_shapes.py b/scripts/build_shapes.py
index 6c91ff059..9d3176985 100644
--- a/scripts/build_shapes.py
+++ b/scripts/build_shapes.py
@@ -167,7 +167,7 @@ def get_GADM_layer(
     outlogging=False,
 ):
     """
-    Function to retrive a specific layer id of a geopackage for a selection of countries
+    Function to retrieve a specific layer id of a geopackage for a selection of countries
 
     Parameters
     ----------
diff --git a/scripts/clean_osm_data.py b/scripts/clean_osm_data.py
index df6f428d8..41f70f9ae 100644
--- a/scripts/clean_osm_data.py
+++ b/scripts/clean_osm_data.py
@@ -126,7 +126,7 @@ def add_line_endings_tosubstations(substations, lines):
 def set_unique_id(df, col):
     """
     Create unique id's, where id is specified by the column "col"
-    The steps below create unique bus id's without loosing the original OSM bus_id
+    The steps below create unique bus id's without losing the original OSM bus_id
 
     Unique bus_id are created by simply adding -1,-2,-3 to the original bus_id
     Every unique id gets a -1
@@ -662,7 +662,7 @@ def integrate_lines_df(df_all_lines, distance_crs):
     clean_circuits(df)
     clean_cables(df)
 
-    # analyse each row of voltage and requency and match their content
+    # analyse each row of voltage and frequency and match their content
     split_and_match_voltage_frequency_size(df)
 
     # fill the circuits column for explode
diff --git a/scripts/non_workflow/zenodo_handler.py b/scripts/non_workflow/zenodo_handler.py
index b308a8773..ab65fd7d7 100644
--- a/scripts/non_workflow/zenodo_handler.py
+++ b/scripts/non_workflow/zenodo_handler.py
@@ -48,7 +48,7 @@
     "access_right": "open",
     "license": {"id": "cc-by-4.0"},
     "keywords": ["Macro Energy Systems", "Power Systems"],
-}  # more opton visisble at Zenodo REST API https://developers.zenodo.org/#introduction
+}  # more options visible at Zenodo REST API https://developers.zenodo.org/#introduction
 
 
 #############
diff --git a/scripts/retrieve_databundle_light.py b/scripts/retrieve_databundle_light.py
index 998baac1f..87fac9e5e 100644
--- a/scripts/retrieve_databundle_light.py
+++ b/scripts/retrieve_databundle_light.py
@@ -496,7 +496,7 @@ def get_best_bundles_by_category(
     # check if non-empty dictionary
     if dict_n_matched:
         # if non-empty, then pick bundles until all countries are selected
-        # or no mor bundles are found
+        # or no more bundles are found
         dict_sort = sorted(dict_n_matched.items(), key=lambda d: d[1])
 
         current_matched_countries = []
diff --git a/scripts/simplify_network.py b/scripts/simplify_network.py
index 7aad36970..27003a8cd 100644
--- a/scripts/simplify_network.py
+++ b/scripts/simplify_network.py
@@ -632,7 +632,7 @@ def drop_isolated_nodes(n, threshold):
     generators_mean_final = n.generators.p_nom.mean()
 
     logger.info(
-        f"Dropped {len(i_to_drop)} buses. A resulted load discrepancy is {(100 * ((load_mean_final - load_mean_origin)/load_mean_origin)):2.1}% and {(100 * ((generators_mean_final - generators_mean_origin)/generators_mean_origin)):2.1}% for average load and generation capacity, respectivelly"
+        f"Dropped {len(i_to_drop)} buses. A resulted load discrepancy is {(100 * ((load_mean_final - load_mean_origin)/load_mean_origin)):2.1}% and {(100 * ((generators_mean_final - generators_mean_origin)/generators_mean_origin)):2.1}% for average load and generation capacity, respectively"
     )
 
     return n
@@ -716,7 +716,7 @@ def merge_isolated_nodes(n, threshold, aggregation_strategies=dict()):
     generators_mean_final = n.generators.p_nom.mean()
 
     logger.info(
-        f"Merged {len(i_suffic_load)} buses. Load attached to a single bus with discrepancies of {(100 * ((load_mean_final - load_mean_origin)/load_mean_origin)):2.1E}% and {(100 * ((generators_mean_final - generators_mean_origin)/generators_mean_origin)):2.1E}% for load and generation capacity, respectivelly"
+        f"Merged {len(i_suffic_load)} buses. Load attached to a single bus with discrepancies of {(100 * ((load_mean_final - load_mean_origin)/load_mean_origin)):2.1E}% and {(100 * ((generators_mean_final - generators_mean_origin)/generators_mean_origin)):2.1E}% for load and generation capacity, respectively"
     )
 
     return clustering.network, busmap