refactor: Split module 'main' into separate modules (#10)

Closes #8
hsbc · Sep 22, 2022 · 7c11c1f · 7c11c1f
1 parent d6095f5
commit 7c11c1f
Show file tree

Hide file tree

Showing 10 changed files with 1,611 additions and 1,460 deletions.
diff --git a/src/pyratings/__init__.py b/src/pyratings/__init__.py
@@ -1,31 +1,46 @@
-"""
-Copyright 2022 HSBC Global Asset Management (Deutschland) GmbH
+# Copyright 2022 HSBC Global Asset Management (Deutschland) GmbH
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        https://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
 
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-"""
-from pyratings.main import (
-    get_pure_ratings,
+from pyratings.aggregate import get_weighted_average
+from pyratings.clean import get_pure_ratings
+from pyratings.consolidate import (
     get_best_ratings,
     get_second_best_ratings,
     get_worst_ratings,
-    get_scores_from_ratings,
-    get_scores_from_warf,
-    get_ratings_from_scores,
-    get_ratings_from_warf,
-    get_warf_from_scores,
-    get_warf_from_ratings,
-    get_weighted_average,
-    get_warf_buffer,
+)
+from pyratings.get_ratings import get_ratings_from_scores, get_ratings_from_warf
+from pyratings.get_scores import get_scores_from_ratings, get_scores_from_warf
+from pyratings.get_warf import get_warf_from_ratings, get_warf_from_scores
+from pyratings.utils import (
     _assert_rating_provider,
     _extract_rating_provider,
+    _get_translation_dict,
 )
+from pyratings.warf import get_warf_buffer
+
+# define public functions
+__all__ = [
+    "get_best_ratings",
+    "get_pure_ratings",
+    "get_ratings_from_scores",
+    "get_ratings_from_warf",
+    "get_scores_from_ratings",
+    "get_scores_from_warf",
+    "get_second_best_ratings",
+    "get_warf_buffer",
+    "get_warf_from_ratings",
+    "get_warf_from_scores",
+    "get_weighted_average",
+    "get_worst_ratings",
+]
diff --git a/src/pyratings/aggregate.py b/src/pyratings/aggregate.py
@@ -0,0 +1,66 @@
+# Copyright 2022 HSBC Global Asset Management (Deutschland) GmbH
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        https://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+
+import pandas as pd
+
+
+def get_weighted_average(data: pd.Series, weights: pd.Series) -> float:
+    """
+    Computes weighted average.
+
+    Parameters
+    ----------
+    data
+        Contains numerical values.
+    weights
+        Contains weights (between 0 and 1) with respect to data.
+
+    Returns
+    -------
+    float
+        Weighted average data.
+
+    Notes
+    -----
+    Computing the weighted average is simply the sumproduct of `data` and `weights`.
+    ``nan`` in `data` will be excluded from calculating the weighted average. All
+    corresponding weights will be ignored. As a matter of fact, the remaining
+    weights will be upscaled so that the weights of all ``non-nan`` rows in `data` will
+    sum up to 1 (100%).
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> import pandas as pd
+
+    >>> rtg_scores = pd.Series(data=[5, 7, 9])
+    >>> wgt = pd.Series(data=[0.5, 0.3, 0.2])
+    >>> get_weighted_average(data=rtg_scores, weights=wgt)
+    6.4
+
+    >>> warf = pd.Series(data=[500, 735, np.nan, 93, np.nan])
+    >>> wgt = pd.Series(data=[0.4, 0.1, 0.1, 0.2, 0.2])
+    >>> get_weighted_average(data=warf, weights=wgt)
+    417.29
+    """
+    # find indices in warf that correspond to np.nan
+    idx_nan = data[pd.isna(data)].index
+
+    # sum weights of securities with an actual rating, i.e. rating is not NaN
+    weights_non_nan = 1 - sum(weights.loc[idx_nan])
+
+    # upscale to 100%
+    weights_upscaled = weights / weights_non_nan
+
+    return data.fillna(0).dot(weights_upscaled)
diff --git a/src/pyratings/clean.py b/src/pyratings/clean.py
@@ -0,0 +1,130 @@
+# Copyright 2022 HSBC Global Asset Management (Deutschland) GmbH
+#
+#    Licensed under the Apache License, Version 2.0 (the "License");
+#    you may not use this file except in compliance with the License.
+#    You may obtain a copy of the License at
+#
+#        https://www.apache.org/licenses/LICENSE-2.0
+#
+#    Unless required by applicable law or agreed to in writing, software
+#    distributed under the License is distributed on an "AS IS" BASIS,
+#    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#    See the License for the specific language governing permissions and
+#    limitations under the License.
+
+from typing import Union
+
+import pandas as pd
+
+
+def get_pure_ratings(
+    ratings: Union[str, pd.Series, pd.DataFrame]
+) -> Union[str, pd.Series, pd.DataFrame]:
+    """Removes rating watches/outlooks.
+
+    Parameters
+    ----------
+    ratings
+        Rating may contain watch, such as `AA- *+`, `BBB+ (CwNegative)`.
+        Outlook/watch should be seperated by a blank from the actual rating.
+
+    Returns
+    -------
+    Union[str, pd.Series, pd.DataFrame]
+        String, Series, or DataFrame with regular ratings stripped off of watches.
+        The name of the resulting Series or the columns of the returning DataFrame will
+        be suffixed with `_clean`.
+
+    Examples
+    --------
+    Cleaning a single rating:
+
+    >>> get_pure_ratings("AA- *+")
+    'AA-'
+
+    >>> get_pure_ratings("Au")
+    'A'
+
+    Cleaning a `pd.Series`:
+
+    >>> import numpy as np
+    >>> import pandas as pd
+
+    >>> rating_series=pd.Series(
+    ...    data=[
+    ...        "BB+ *-",
+    ...        "BBB *+",
+    ...        np.nan,
+    ...        "AA- (Developing)",
+    ...        np.nan,
+    ...        "CCC+ (CwPositive)",
+    ...        "BB+u",
+    ...    ],
+    ...    name="rtg_SP",
+    ... )
+    >>> get_pure_ratings(rating_series)
+    0     BB+
+    1     BBB
+    2     NaN
+    3     AA-
+    4     NaN
+    5    CCC+
+    6     BB+
+    Name: rtg_SP_clean, dtype: object
+
+    Cleaning a `pd.DataFrame`:
+
+    >>> rtg_df = pd.DataFrame(
+    ...    data={
+    ...        "rtg_SP": [
+    ...            "BB+ *-",
+    ...            "BBB *+",
+    ...            np.nan,
+    ...            "AA- (Developing)",
+    ...            np.nan,
+    ...            "CCC+ (CwPositive)",
+    ...            "BB+u",
+    ...        ],
+    ...        "rtg_Fitch": [
+    ...            "BB+ *-",
+    ...            "BBB *+",
+    ...            pd.NA,
+    ...            "AA- (Developing)",
+    ...            np.nan,
+    ...            "CCC+ (CwPositive)",
+    ...            "BB+u",
+    ...        ],
+    ...    },
+    ... )
+    >>> get_pure_ratings(rtg_df)
+      rtg_SP_clean rtg_Fitch_clean
+    0          BB+             BB+
+    1          BBB             BBB
+    2          NaN            <NA>
+    3          AA-             AA-
+    4          NaN             NaN
+    5         CCC+            CCC+
+    6          BB+             BB+
+
+    """
+    if isinstance(ratings, str):
+        ratings = ratings.split()[0]
+        ratings = ratings.rstrip("uU")
+        return ratings
+
+    elif isinstance(ratings, pd.Series):
+        # identify string occurrences
+        isstring = ratings.apply(type).eq(str)
+
+        # strip string after occurrence of very first blank and strip character 'u',
+        # which has usually been added without a blank
+        ratings[isstring] = ratings[isstring].str.split().str[0]
+        ratings[isstring] = ratings[isstring].str.rstrip("uU")
+        ratings.name = f"{ratings.name}_clean"
+        return ratings
+
+    elif isinstance(ratings, pd.DataFrame):
+        # Recursive call of `get_pure_ratings`
+        return pd.concat(
+            [get_pure_ratings(ratings=ratings[col]) for col in ratings.columns], axis=1
+        )