From 64a41f66bb19db30a254bb6af376e8e0a1f9c8e0 Mon Sep 17 00:00:00 2001
From: Matthew Gidden <matthew.gidden@gmail.com>
Date: Thu, 16 Feb 2023 16:33:22 +0100
Subject: [PATCH 01/12] initial attempt at a fast init

---
 pyam/core.py          |  9 +++---
 pyam/utils.py         | 58 ++++++++++++++++++++++++++++++++++++++-
 tests/profile_init.py | 64 +++++++++++++++++++++++++++++++++++++++++++
 tests/test_core.py    |  4 +++
 4 files changed, 130 insertions(+), 5 deletions(-)
 create mode 100644 tests/profile_init.py

diff --git a/pyam/core.py b/pyam/core.py
index 5bd6d0a25..d46ae89ee 100755
--- a/pyam/core.py
+++ b/pyam/core.py
@@ -120,7 +120,7 @@ class IamDataFrame(object):
     for those who are not used to the pandas/Python universe.
     """
 
-    def __init__(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs):
+    def __init__(self, data, meta=None, index=DEFAULT_META_INDEX, fast=False, **kwargs):
         """Initialize an instance of an IamDataFrame"""
         if isinstance(data, IamDataFrame):
             if kwargs:
@@ -133,9 +133,9 @@ def __init__(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs):
             for attr, value in data.__dict__.items():
                 setattr(self, attr, value)
         else:
-            self._init(data, meta, index=index, **kwargs)
+            self._init(data, meta, index=index, fast=fast, **kwargs)
 
-    def _init(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs):
+    def _init(self, data, meta=None, index=DEFAULT_META_INDEX, fast=False, **kwargs):
         """Process data and set attributes for new instance"""
 
         # pop kwarg for meta_sheet_name (prior to reading data from file)
@@ -163,7 +163,8 @@ def _init(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs):
 
         # cast data from pandas
         elif isinstance(data, pd.DataFrame) or isinstance(data, pd.Series):
-            _data = format_data(data.copy(), index=index, **kwargs)
+            _data = data if fast else data.copy()
+            _data = format_data(_data, index=index, fast=fast, **kwargs)
 
         # unsupported `data` args
         elif islistable(data):
diff --git a/pyam/utils.py b/pyam/utils.py
index d1f9de068..0f4ce57db 100644
--- a/pyam/utils.py
+++ b/pyam/utils.py
@@ -181,8 +181,64 @@ def read_file(path, *args, **kwargs):
     return format_data(read_pandas(path, *args, **kwargs), **format_kwargs)
 
 
-def format_data(df, index, **kwargs):
+def intuit_column_groups(df, index):
+    cols = [c for c in df.columns if c not in index + REQUIRED_COLS]
+    year_cols, time_cols, extra_cols = [], [], []
+    for i in cols:
+        # if the column name can be cast to integer, assume it's a year column
+        try:
+            int(i)
+            year_cols.append(i)
+
+        # otherwise, try casting to datetime
+        except (ValueError, TypeError):
+            try:
+                dateutil.parser.parse(str(i))
+                time_cols.append(i)
+
+            # neither year nor datetime, so it is an extra-column
+            except ValueError:
+                extra_cols.append(i)
+    if year_cols and not time_cols:
+        time_col = "year"
+        melt_cols = sorted(year_cols)
+    else:
+        time_col = "time"
+        melt_cols = sorted(year_cols) + sorted(time_cols)
+    if not melt_cols:
+        raise ValueError("Missing time domain")
+    return extra_cols, time_col, melt_cols
+
+def fast_format_data(df, index=DEFAULT_META_INDEX):
+    if not isinstance(df, pd.DataFrame):
+        raise TypeError('Fast format only works if provided a pd.DataFrame')
+    col_diff = set(IAMC_IDX) - set(df.columns)
+    if col_diff:
+        raise ValueError(f'Missing required columns: {col_diff}')
+
+    if "value" not in df.columns:
+        extra_cols, time_col, melt_cols = intuit_column_groups(df, index)
+        df = pd.melt(
+            df,
+            id_vars=index + REQUIRED_COLS + extra_cols,
+            var_name=time_col,
+            value_vars=melt_cols,
+            value_name="value",
+        )
+
+        # cast to pd.Series, check for duplicates
+        idx_cols = index + REQUIRED_COLS + [time_col] + extra_cols
+        df.set_index(idx_cols, inplace=True)
+        df = df.value
+
+    df.sort_index(inplace=True)
+    return df, index, time_col, extra_cols
+
+def format_data(df, index, fast=False, **kwargs):
     """Convert a pandas.Dataframe or pandas.Series to the required format"""
+    if fast:
+        return fast_format_data(df, index)
+
     if isinstance(df, pd.Series):
         df.name = df.name or "value"
         df = df.to_frame()
diff --git a/tests/profile_init.py b/tests/profile_init.py
new file mode 100644
index 000000000..34ad3973c
--- /dev/null
+++ b/tests/profile_init.py
@@ -0,0 +1,64 @@
+import string
+import numpy as np
+import pandas as pd
+from functools import wraps
+import time
+
+import pyam 
+
+YEARS = range(2010, 2101, 10)
+
+
+
+def timeit(func):
+    @wraps(func)
+    def timeit_wrapper(*args, **kwargs):
+        start_time = time.perf_counter()
+        result = func(*args, **kwargs)
+        end_time = time.perf_counter()
+        total_time = end_time - start_time
+        return total_time, result
+    return timeit_wrapper
+
+def join(a):
+    return ''.join(a)
+
+def gen_str(N, k=1):
+    return np.random.choice(list(string.ascii_lowercase),  size=(k, N,len(pyam.IAMC_IDX)))
+
+def gen_str_iamc(N, k=1):
+    return np.apply_along_axis(join, 0, gen_str(N, k))
+
+def gen_float(N, years=YEARS):
+    return np.random.choice(range(10),  size=(N,len(years), ))
+
+@timeit
+def gen_frame(strdata, fdata, fast):
+    return pyam.IamDataFrame(pd.concat([strdata, fdata], axis=1), fast=fast)
+
+def profile(max=5):
+    data = {'N': [], 'time': [], 'type': []}
+    for N in [int(10**n) for n in np.arange(1, 8, step=0.5)]:
+        print(N)
+        for type in ['slow', 'fast']:
+            try:
+                strdata = pd.DataFrame(gen_str_iamc(N, k=5), columns=pyam.IAMC_IDX)
+                fdata = pd.DataFrame(gen_float(N), columns=YEARS)
+                time, df = gen_frame(strdata, fdata, fast=type == 'fast')
+                print(N, type, time)
+                data['N'].append(N)
+                data['type'].append(type)
+                data['time'].append(time)
+            except:
+                continue
+    return pd.DataFrame.from_dict(data)
+
+if __name__ == '__main__':
+    import matplotlib.pyplot as plt
+    import seaborn as sns
+    df = profile(max=8)
+    fig, ax = plt.subplots()
+    sns.lineplot(data=df, x='N', y='time', hue='type', ax=ax)
+    ax.set(xscale='log')
+    fig.savefig('profile_init.png')
+    df.to_csv('profile_init.csv')
\ No newline at end of file
diff --git a/tests/test_core.py b/tests/test_core.py
index aa0064cc5..1a3eec4ce 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -72,6 +72,10 @@ def test_init_from_iamdf(test_df_year):
     assert df.scenario == ["scen_b", "scen_bar"]
     assert test_df_year.scenario == ["scen_b", "scen_foo"]
 
+def test_init_fast(test_df_year):
+    obs = IamDataFrame(test_df_year, fast=True)
+    exp = IamDataFrame(test_df_year)
+    assert_iamframe_equal(obs, exp)
 
 def test_init_from_iamdf_raises(test_df_year):
     # casting an IamDataFrame instance again with extra args fails

From 2dfc141a409937714b06b21dddc7ccc0b76f8c03 Mon Sep 17 00:00:00 2001
From: Matthew Gidden <matthew.gidden@gmail.com>
Date: Fri, 17 Feb 2023 10:03:57 +0100
Subject: [PATCH 02/12] added fast to filereading with tests and profiling

---
 pyam/core.py          |  2 +-
 pyam/utils.py         | 42 +++++++++++++++++++++++++++++-------------
 tests/profile_init.py | 36 +++++++++++++++++++++++++++++-------
 tests/test_io.py      |  7 ++++---
 4 files changed, 63 insertions(+), 24 deletions(-)

diff --git a/pyam/core.py b/pyam/core.py
index d46ae89ee..231166854 100755
--- a/pyam/core.py
+++ b/pyam/core.py
@@ -159,7 +159,7 @@ def _init(self, data, meta=None, index=DEFAULT_META_INDEX, fast=False, **kwargs)
             if not data.is_file():
                 raise FileNotFoundError(f"No such file: '{data}'")
             logger.info(f"Reading file {data}")
-            _data = read_file(data, index=index, **kwargs)
+            _data = read_file(data, index=index, fast=fast, **kwargs)
 
         # cast data from pandas
         elif isinstance(data, pd.DataFrame) or isinstance(data, pd.Series):
diff --git a/pyam/utils.py b/pyam/utils.py
index 0f4ce57db..e1894c861 100644
--- a/pyam/utils.py
+++ b/pyam/utils.py
@@ -169,16 +169,18 @@ def is_empty(name, s):
             return False
 
         empty_cols = [c for c in df.columns if is_empty(c, df[c])]
-        return df.drop(columns=empty_cols).dropna(axis=0, how="all")
+        df.drop(columns=empty_cols, inplace=True)
+        df.dropna(axis=0, how="all", inplace=True)
+        return df
 
 
-def read_file(path, *args, **kwargs):
+def read_file(path, fast=False, *args, **kwargs):
     """Read data from a file"""
     # extract kwargs that are intended for `format_data`
     format_kwargs = dict(index=kwargs.pop("index"))
     for c in [i for i in IAMC_IDX + ["year", "time", "value"] if i in kwargs]:
         format_kwargs[c] = kwargs.pop(c)
-    return format_data(read_pandas(path, *args, **kwargs), **format_kwargs)
+    return format_data(read_pandas(path, *args, **kwargs), fast=fast, **format_kwargs)
 
 
 def intuit_column_groups(df, index):
@@ -212,27 +214,41 @@ def intuit_column_groups(df, index):
 def fast_format_data(df, index=DEFAULT_META_INDEX):
     if not isinstance(df, pd.DataFrame):
         raise TypeError('Fast format only works if provided a pd.DataFrame')
+
+    # all lower case
+    str_cols = [c for c in df.columns if isstr(c)]
+    df.rename(columns={c: str(c).lower() for c in str_cols}, inplace=True)
+    
+    if "notes" in df.columns:  # this came from the database
+        logger.info("Ignoring notes column in dataframe")
+        df.drop(columns="notes", inplace=True)
+        col = df.columns[0]  # first column has database copyright notice
+        df = df[~df[col].str.contains("database", case=False)]
+    
     col_diff = set(IAMC_IDX) - set(df.columns)
     if col_diff:
         raise ValueError(f'Missing required columns: {col_diff}')
-
-    if "value" not in df.columns:
-        extra_cols, time_col, melt_cols = intuit_column_groups(df, index)
+    
+    extra_cols, time_col, melt_cols = intuit_column_groups(df, index)
+    # build idx in expected order with IAMC_IDX first
+    idx = IAMC_IDX + list(set(index + extra_cols)- set(IAMC_IDX))
+    if "value" not in df.columns:    
         df = pd.melt(
             df,
-            id_vars=index + REQUIRED_COLS + extra_cols,
+            id_vars=idx,
             var_name=time_col,
             value_vars=melt_cols,
             value_name="value",
         )
 
-        # cast to pd.Series, check for duplicates
-        idx_cols = index + REQUIRED_COLS + [time_col] + extra_cols
-        df.set_index(idx_cols, inplace=True)
-        df = df.value
+    df.dropna(inplace=True, subset=["value"])
+    df.loc[df.unit.isnull(), "unit"] = ""
 
-    df.sort_index(inplace=True)
-    return df, index, time_col, extra_cols
+    # cast to pd.Series and return
+    idx_cols = idx + [time_col]
+    df.set_index(idx_cols, inplace=True)
+    df.sort_index(inplace=True) # TODO: not sure this is needed
+    return df.value, index, time_col, extra_cols
 
 def format_data(df, index, fast=False, **kwargs):
     """Convert a pandas.Dataframe or pandas.Series to the required format"""
diff --git a/tests/profile_init.py b/tests/profile_init.py
index 34ad3973c..dc5d15482 100644
--- a/tests/profile_init.py
+++ b/tests/profile_init.py
@@ -2,6 +2,7 @@
 import numpy as np
 import pandas as pd
 from functools import wraps
+from pathlib import Path
 import time
 
 import pyam 
@@ -33,18 +34,19 @@ def gen_float(N, years=YEARS):
     return np.random.choice(range(10),  size=(N,len(years), ))
 
 @timeit
-def gen_frame(strdata, fdata, fast):
-    return pyam.IamDataFrame(pd.concat([strdata, fdata], axis=1), fast=fast)
+def gen_frame(data, fast):
+    return pyam.IamDataFrame(data, fast=fast)
 
 def profile(max=5):
     data = {'N': [], 'time': [], 'type': []}
-    for N in [int(10**n) for n in np.arange(1, 8, step=0.5)]:
+    for N in [int(10**n) for n in np.arange(1, max, step=0.5)]:
         print(N)
         for type in ['slow', 'fast']:
             try:
                 strdata = pd.DataFrame(gen_str_iamc(N, k=5), columns=pyam.IAMC_IDX)
                 fdata = pd.DataFrame(gen_float(N), columns=YEARS)
-                time, df = gen_frame(strdata, fdata, fast=type == 'fast')
+                _data = pd.concat([strdata, fdata], axis=1)
+                time, df = gen_frame(_data, fast=type == 'fast')
                 print(N, type, time)
                 data['N'].append(N)
                 data['type'].append(type)
@@ -53,12 +55,32 @@ def profile(max=5):
                 continue
     return pd.DataFrame.from_dict(data)
 
-if __name__ == '__main__':
+@timeit
+def gen_frame_from_file(file, fast):
+    return pyam.IamDataFrame(file, fast=fast)
+
+def profile_file(fname):
+    data = {'N': [], 'time': [], 'type': []}
+    for type in ['slow', 'fast']:
+        time, df =  gen_frame_from_file(fname, fast=type == 'fast')
+        data['N'].append(len(df))
+        data['type'].append(type)
+        data['time'].append(time)
+    return pd.DataFrame.from_dict(data)
+
+def main():
     import matplotlib.pyplot as plt
     import seaborn as sns
-    df = profile(max=8)
+    dfp = profile(max=6)
+    df6 = profile_file(fname=Path('./AR6_Scenarios_Database_World_v1.0.csv'))
+    df = pd.concat([dfp, df6]).reset_index()
+    df.to_csv('profile_init.csv')
+    print(df)
     fig, ax = plt.subplots()
     sns.lineplot(data=df, x='N', y='time', hue='type', ax=ax)
     ax.set(xscale='log')
     fig.savefig('profile_init.png')
-    df.to_csv('profile_init.csv')
\ No newline at end of file
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/tests/test_io.py b/tests/test_io.py
index 56add9217..28174d7cc 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -225,15 +225,16 @@ def test_load_meta_empty(test_pd_df):
     exp = IamDataFrame(test_pd_df)
     assert_iamframe_equal(obs, exp)
 
-
-def test_load_ssp_database_downloaded_file(test_pd_df):
+@pytest.mark.parametrize("fast", [True, False])
+def test_load_ssp_database_downloaded_file(test_pd_df, fast):
     exp = IamDataFrame(test_pd_df).filter(**FILTER_ARGS).as_pandas()
     file = TEST_DATA_DIR / "test_SSP_database_raw_download.xlsx"
-    obs_df = IamDataFrame(file)
+    obs_df = IamDataFrame(file, fast=fast)
     pd.testing.assert_frame_equal(obs_df.as_pandas(), exp)
 
 
 def test_load_rcp_database_downloaded_file(test_pd_df):
+    # RCP data not tested for fast at present because it requires additional processing
     exp = IamDataFrame(test_pd_df).filter(**FILTER_ARGS).as_pandas()
     file = TEST_DATA_DIR / "test_RCP_database_raw_download.xlsx"
     obs_df = IamDataFrame(file)

From 6a58ab2ee6f14c92497c3cc89a8fad43ce95a361 Mon Sep 17 00:00:00 2001
From: Matthew Gidden <matthew.gidden@gmail.com>
Date: Fri, 17 Feb 2023 10:13:42 +0100
Subject: [PATCH 03/12] put ar6 data location with other files

---
 tests/profile_init.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/profile_init.py b/tests/profile_init.py
index dc5d15482..4c299c523 100644
--- a/tests/profile_init.py
+++ b/tests/profile_init.py
@@ -69,10 +69,11 @@ def profile_file(fname):
     return pd.DataFrame.from_dict(data)
 
 def main():
+    # requires downloading AR6 dataset and placing it in the data folder
     import matplotlib.pyplot as plt
     import seaborn as sns
     dfp = profile(max=6)
-    df6 = profile_file(fname=Path('./AR6_Scenarios_Database_World_v1.0.csv'))
+    df6 = profile_file(fname=Path('./data/AR6_Scenarios_Database_World_v1.0.csv'))
     df = pd.concat([dfp, df6]).reset_index()
     df.to_csv('profile_init.csv')
     print(df)

From 7d5a5968302d44e274eef556dd778544d3db2f20 Mon Sep 17 00:00:00 2001
From: Matthew Gidden <matthew.gidden@gmail.com>
Date: Fri, 17 Feb 2023 10:21:45 +0100
Subject: [PATCH 04/12] blacked files

---
 pyam/utils.py         | 21 ++++++++------
 tests/profile_init.py | 66 +++++++++++++++++++++++++++----------------
 tests/test_core.py    |  2 ++
 tests/test_io.py      |  1 +
 4 files changed, 57 insertions(+), 33 deletions(-)

diff --git a/pyam/utils.py b/pyam/utils.py
index e1894c861..89732970d 100644
--- a/pyam/utils.py
+++ b/pyam/utils.py
@@ -211,28 +211,29 @@ def intuit_column_groups(df, index):
         raise ValueError("Missing time domain")
     return extra_cols, time_col, melt_cols
 
+
 def fast_format_data(df, index=DEFAULT_META_INDEX):
     if not isinstance(df, pd.DataFrame):
-        raise TypeError('Fast format only works if provided a pd.DataFrame')
+        raise TypeError("Fast format only works if provided a pd.DataFrame")
 
     # all lower case
     str_cols = [c for c in df.columns if isstr(c)]
     df.rename(columns={c: str(c).lower() for c in str_cols}, inplace=True)
-    
+
     if "notes" in df.columns:  # this came from the database
         logger.info("Ignoring notes column in dataframe")
         df.drop(columns="notes", inplace=True)
         col = df.columns[0]  # first column has database copyright notice
         df = df[~df[col].str.contains("database", case=False)]
-    
+
     col_diff = set(IAMC_IDX) - set(df.columns)
     if col_diff:
-        raise ValueError(f'Missing required columns: {col_diff}')
-    
+        raise ValueError(f"Missing required columns: {col_diff}")
+
     extra_cols, time_col, melt_cols = intuit_column_groups(df, index)
     # build idx in expected order with IAMC_IDX first
-    idx = IAMC_IDX + list(set(index + extra_cols)- set(IAMC_IDX))
-    if "value" not in df.columns:    
+    idx = IAMC_IDX + list(set(index + extra_cols) - set(IAMC_IDX))
+    if "value" not in df.columns:
         df = pd.melt(
             df,
             id_vars=idx,
@@ -247,9 +248,10 @@ def fast_format_data(df, index=DEFAULT_META_INDEX):
     # cast to pd.Series and return
     idx_cols = idx + [time_col]
     df.set_index(idx_cols, inplace=True)
-    df.sort_index(inplace=True) # TODO: not sure this is needed
+    #    df.sort_index(inplace=True) # TODO: not sure this is needed
     return df.value, index, time_col, extra_cols
 
+
 def format_data(df, index, fast=False, **kwargs):
     """Convert a pandas.Dataframe or pandas.Series to the required format"""
     if fast:
@@ -440,7 +442,8 @@ def convert_r_columns(c):
     if df.empty:
         logger.warning("Formatted data is empty!")
 
-    return df.sort_index(), index, time_col, extra_cols
+    #    return df.sort_index(), index, time_col, extra_cols
+    return df, index, time_col, extra_cols
 
 
 def sort_data(data, cols):
diff --git a/tests/profile_init.py b/tests/profile_init.py
index 4c299c523..183a8fc61 100644
--- a/tests/profile_init.py
+++ b/tests/profile_init.py
@@ -5,12 +5,11 @@
 from pathlib import Path
 import time
 
-import pyam 
+import pyam
 
 YEARS = range(2010, 2101, 10)
 
 
-
 def timeit(func):
     @wraps(func)
     def timeit_wrapper(*args, **kwargs):
@@ -19,69 +18,88 @@ def timeit_wrapper(*args, **kwargs):
         end_time = time.perf_counter()
         total_time = end_time - start_time
         return total_time, result
+
     return timeit_wrapper
 
+
 def join(a):
-    return ''.join(a)
+    return "".join(a)
+
 
 def gen_str(N, k=1):
-    return np.random.choice(list(string.ascii_lowercase),  size=(k, N,len(pyam.IAMC_IDX)))
+    return np.random.choice(
+        list(string.ascii_lowercase), size=(k, N, len(pyam.IAMC_IDX))
+    )
+
 
 def gen_str_iamc(N, k=1):
     return np.apply_along_axis(join, 0, gen_str(N, k))
 
+
 def gen_float(N, years=YEARS):
-    return np.random.choice(range(10),  size=(N,len(years), ))
+    return np.random.choice(
+        range(10),
+        size=(
+            N,
+            len(years),
+        ),
+    )
+
 
 @timeit
 def gen_frame(data, fast):
     return pyam.IamDataFrame(data, fast=fast)
 
+
 def profile(max=5):
-    data = {'N': [], 'time': [], 'type': []}
+    data = {"N": [], "time": [], "type": []}
     for N in [int(10**n) for n in np.arange(1, max, step=0.5)]:
         print(N)
-        for type in ['slow', 'fast']:
+        for type in ["slow", "fast"]:
             try:
                 strdata = pd.DataFrame(gen_str_iamc(N, k=5), columns=pyam.IAMC_IDX)
                 fdata = pd.DataFrame(gen_float(N), columns=YEARS)
                 _data = pd.concat([strdata, fdata], axis=1)
-                time, df = gen_frame(_data, fast=type == 'fast')
+                time, df = gen_frame(_data, fast=type == "fast")
                 print(N, type, time)
-                data['N'].append(N)
-                data['type'].append(type)
-                data['time'].append(time)
+                data["N"].append(N)
+                data["type"].append(type)
+                data["time"].append(time)
             except:
                 continue
     return pd.DataFrame.from_dict(data)
 
+
 @timeit
 def gen_frame_from_file(file, fast):
     return pyam.IamDataFrame(file, fast=fast)
 
+
 def profile_file(fname):
-    data = {'N': [], 'time': [], 'type': []}
-    for type in ['slow', 'fast']:
-        time, df =  gen_frame_from_file(fname, fast=type == 'fast')
-        data['N'].append(len(df))
-        data['type'].append(type)
-        data['time'].append(time)
+    data = {"N": [], "time": [], "type": []}
+    for type in ["slow", "fast"]:
+        time, df = gen_frame_from_file(fname, fast=type == "fast")
+        data["N"].append(len(df))
+        data["type"].append(type)
+        data["time"].append(time)
     return pd.DataFrame.from_dict(data)
 
+
 def main():
     # requires downloading AR6 dataset and placing it in the data folder
     import matplotlib.pyplot as plt
     import seaborn as sns
+
     dfp = profile(max=6)
-    df6 = profile_file(fname=Path('./data/AR6_Scenarios_Database_World_v1.0.csv'))
+    df6 = profile_file(fname=Path("./data/AR6_Scenarios_Database_World_v1.0.csv"))
     df = pd.concat([dfp, df6]).reset_index()
-    df.to_csv('profile_init.csv')
+    df.to_csv("profile_init.csv")
     print(df)
     fig, ax = plt.subplots()
-    sns.lineplot(data=df, x='N', y='time', hue='type', ax=ax)
-    ax.set(xscale='log')
-    fig.savefig('profile_init.png')
+    sns.lineplot(data=df, x="N", y="time", hue="type", ax=ax)
+    ax.set(xscale="log")
+    fig.savefig("profile_init.png")
 
 
-if __name__ == '__main__':
-    main()
\ No newline at end of file
+if __name__ == "__main__":
+    main()
diff --git a/tests/test_core.py b/tests/test_core.py
index 1a3eec4ce..62e9ade97 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -72,11 +72,13 @@ def test_init_from_iamdf(test_df_year):
     assert df.scenario == ["scen_b", "scen_bar"]
     assert test_df_year.scenario == ["scen_b", "scen_foo"]
 
+
 def test_init_fast(test_df_year):
     obs = IamDataFrame(test_df_year, fast=True)
     exp = IamDataFrame(test_df_year)
     assert_iamframe_equal(obs, exp)
 
+
 def test_init_from_iamdf_raises(test_df_year):
     # casting an IamDataFrame instance again with extra args fails
     match = "Invalid arguments for initializing from IamDataFrame: {'model': 'foo'}"
diff --git a/tests/test_io.py b/tests/test_io.py
index 28174d7cc..94a384a10 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -225,6 +225,7 @@ def test_load_meta_empty(test_pd_df):
     exp = IamDataFrame(test_pd_df)
     assert_iamframe_equal(obs, exp)
 
+
 @pytest.mark.parametrize("fast", [True, False])
 def test_load_ssp_database_downloaded_file(test_pd_df, fast):
     exp = IamDataFrame(test_pd_df).filter(**FILTER_ARGS).as_pandas()

From 45f31613d28e1649d26ba84fbabb92d1c18a980e Mon Sep 17 00:00:00 2001
From: Matthew Gidden <matthew.gidden@gmail.com>
Date: Fri, 17 Feb 2023 10:27:02 +0100
Subject: [PATCH 05/12] uncomment sorts

---
 pyam/utils.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/pyam/utils.py b/pyam/utils.py
index 89732970d..1db1853c1 100644
--- a/pyam/utils.py
+++ b/pyam/utils.py
@@ -248,7 +248,7 @@ def fast_format_data(df, index=DEFAULT_META_INDEX):
     # cast to pd.Series and return
     idx_cols = idx + [time_col]
     df.set_index(idx_cols, inplace=True)
-    #    df.sort_index(inplace=True) # TODO: not sure this is needed
+    df.sort_index(inplace=True) # TODO: not sure this is needed
     return df.value, index, time_col, extra_cols
 
 
@@ -442,8 +442,7 @@ def convert_r_columns(c):
     if df.empty:
         logger.warning("Formatted data is empty!")
 
-    #    return df.sort_index(), index, time_col, extra_cols
-    return df, index, time_col, extra_cols
+    return df.sort_index(), index, time_col, extra_cols
 
 
 def sort_data(data, cols):

From 7046b86392deabfde6aa52fe4c7791448824db27 Mon Sep 17 00:00:00 2001
From: Matthew Gidden <matthew.gidden@gmail.com>
Date: Fri, 17 Feb 2023 10:37:19 +0100
Subject: [PATCH 06/12] moved profile_init to profile module

---
 {tests => profile}/profile_init.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename {tests => profile}/profile_init.py (100%)

diff --git a/tests/profile_init.py b/profile/profile_init.py
similarity index 100%
rename from tests/profile_init.py
rename to profile/profile_init.py

From 873ee463889c84a88fd44a316cef3f73de7996a1 Mon Sep 17 00:00:00 2001
From: Matthew Gidden <matthew.gidden@gmail.com>
Date: Sat, 18 Feb 2023 12:22:47 +0100
Subject: [PATCH 07/12] refactor fast format to support series and basic
 dataframes. also supports file reading

---
 pyam/core.py  |   9 ++--
 pyam/utils.py | 121 +++++++++++++++++++++++++++++---------------------
 2 files changed, 76 insertions(+), 54 deletions(-)

diff --git a/pyam/core.py b/pyam/core.py
index 231166854..a016627de 100755
--- a/pyam/core.py
+++ b/pyam/core.py
@@ -28,6 +28,7 @@
     write_sheet,
     read_file,
     read_pandas,
+    fast_format_data,
     format_data,
     merge_meta,
     find_depth,
@@ -162,9 +163,11 @@ def _init(self, data, meta=None, index=DEFAULT_META_INDEX, fast=False, **kwargs)
             _data = read_file(data, index=index, fast=fast, **kwargs)
 
         # cast data from pandas
-        elif isinstance(data, pd.DataFrame) or isinstance(data, pd.Series):
-            _data = data if fast else data.copy()
-            _data = format_data(_data, index=index, fast=fast, **kwargs)
+        elif isinstance(data, (pd.DataFrame, pd.Series)):
+            if fast:
+                _data = fast_format_data(data, index=index, **kwargs)
+            else:
+                _data = format_data(data.copy(), index=index, **kwargs)
 
         # unsupported `data` args
         elif islistable(data):
diff --git a/pyam/utils.py b/pyam/utils.py
index 1db1853c1..9b48e499a 100644
--- a/pyam/utils.py
+++ b/pyam/utils.py
@@ -180,10 +180,32 @@ def read_file(path, fast=False, *args, **kwargs):
     format_kwargs = dict(index=kwargs.pop("index"))
     for c in [i for i in IAMC_IDX + ["year", "time", "value"] if i in kwargs]:
         format_kwargs[c] = kwargs.pop(c)
-    return format_data(read_pandas(path, *args, **kwargs), fast=fast, **format_kwargs)
+    data = read_pandas(path, *args, **kwargs)
+    if fast:
+        # determine non-data columns
+        extra_cols, time_col, data_cols = intuit_column_groups(data)
+        # format columns for fast reading
+        data = data.rename(columns={c: str(c).lower() for c in extra_cols})
+        extra_cols = [str(c).lower() for c in extra_cols]
+        for c in format_kwargs['index']:
+            extra_cols.remove(c)
+        # support databases
+        if 'notes' in data.columns:
+            data = format_from_database(data)
+            extra_cols.remove('notes')
+        # force integer year columns
+        if time_col == 'year':
+           data = data.rename(columns={c: int(c) for c in data_cols})
+        # support file data in long format
+        if 'value' in extra_cols:
+            extra_cols.remove('value')
+        idx = IAMC_IDX + list(set(format_kwargs['index'] + extra_cols) - set(IAMC_IDX))
+        return fast_format_data(data.set_index(idx), **format_kwargs)
+    else:
+        return format_data(data, **format_kwargs)
 
 
-def intuit_column_groups(df, index):
+def intuit_column_groups(df, index=[]):
     cols = [c for c in df.columns if c not in index + REQUIRED_COLS]
     year_cols, time_cols, extra_cols = [], [], []
     for i in cols:
@@ -213,49 +235,56 @@ def intuit_column_groups(df, index):
 
 
 def fast_format_data(df, index=DEFAULT_META_INDEX):
-    if not isinstance(df, pd.DataFrame):
-        raise TypeError("Fast format only works if provided a pd.DataFrame")
+    """A faster formatting funciton with more stringent dataframe requirements
 
-    # all lower case
-    str_cols = [c for c in df.columns if isstr(c)]
-    df.rename(columns={c: str(c).lower() for c in str_cols}, inplace=True)
-
-    if "notes" in df.columns:  # this came from the database
-        logger.info("Ignoring notes column in dataframe")
-        df.drop(columns="notes", inplace=True)
-        col = df.columns[0]  # first column has database copyright notice
-        df = df[~df[col].str.contains("database", case=False)]
-
-    col_diff = set(IAMC_IDX) - set(df.columns)
-    if col_diff:
-        raise ValueError(f"Missing required columns: {col_diff}")
-
-    extra_cols, time_col, melt_cols = intuit_column_groups(df, index)
-    # build idx in expected order with IAMC_IDX first
+    Requirements:
+    1. either a pd.Series or pd.DataFrame with a pyam-compatible MultiIndex
+    2. if a pd.DataFrame, all columns as either integer year or datetime 
+    3. no null values
+    """
+    if not isinstance(df, (pd.DataFrame, pd.Series)):
+        raise TypeError("Fast format only works if provided a pd.DataFrame or pd.Series")
+    if set(IAMC_IDX) - set(df.index.names):
+        raise ValueError(
+            f"Missing required index levels: {set(IAMC_IDX) - set(df.index.names)}"
+            )
+    
+    # index in expected order
+    extra_cols = list(set(df.index.names).difference((set(IAMC_IDX) | set(index))))
+    if len(set(['time', 'year']) - set(extra_cols)) == 0:
+        raise ValueError('Can not have time and year as indicies')
     idx = IAMC_IDX + list(set(index + extra_cols) - set(IAMC_IDX))
-    if "value" not in df.columns:
-        df = pd.melt(
-            df,
-            id_vars=idx,
-            var_name=time_col,
-            value_vars=melt_cols,
-            value_name="value",
-        )
-
-    df.dropna(inplace=True, subset=["value"])
-    df.loc[df.unit.isnull(), "unit"] = ""
-
-    # cast to pd.Series and return
-    idx_cols = idx + [time_col]
-    df.set_index(idx_cols, inplace=True)
-    df.sort_index(inplace=True) # TODO: not sure this is needed
-    return df.value, index, time_col, extra_cols
+    df = df.reorder_levels(idx)
 
+    # migrate dataframe to series
+    if isinstance(df, pd.DataFrame):
+        _, time_col, _ = intuit_column_groups(df, index=index)
+        df = df.rename_axis(columns=time_col)
+        df = df.stack()
+    else:
+        time_col = list(set(['time', 'year']) & set(extra_cols))[0]
+        extra_cols = list(set(extra_cols) - set(['time', 'year']))
+
+    df.name = 'value'
+
+    return df, index, time_col, extra_cols
+
+def format_from_database(df):
+    logger.info("Ignoring notes column in dataframe")
+    df.drop(columns="notes", inplace=True)
+    col = df.columns[0]  # first column has database copyright notice
+    df = df[~df[col].str.contains("database", case=False)]
+    if "scenario" in df.columns and "model" not in df.columns:
+        # model and scenario are jammed together in RCP data
+        scen = df["scenario"]
+        df.loc[:, "model"] = scen.apply(lambda s: s.split("-")[0].strip())
+        df.loc[:, "scenario"] = scen.apply(
+            lambda s: "-".join(s.split("-")[1:]).strip()
+        )
+    return df
 
-def format_data(df, index, fast=False, **kwargs):
+def format_data(df, index, **kwargs):
     """Convert a pandas.Dataframe or pandas.Series to the required format"""
-    if fast:
-        return fast_format_data(df, index)
 
     if isinstance(df, pd.Series):
         df.name = df.name or "value"
@@ -315,17 +344,7 @@ def convert_r_columns(c):
     df.rename(columns={c: str(c).lower() for c in str_cols}, inplace=True)
 
     if "notes" in df.columns:  # this came from the database
-        logger.info("Ignoring notes column in dataframe")
-        df.drop(columns="notes", inplace=True)
-        col = df.columns[0]  # first column has database copyright notice
-        df = df[~df[col].str.contains("database", case=False)]
-        if "scenario" in df.columns and "model" not in df.columns:
-            # model and scenario are jammed together in RCP data
-            scen = df["scenario"]
-            df.loc[:, "model"] = scen.apply(lambda s: s.split("-")[0].strip())
-            df.loc[:, "scenario"] = scen.apply(
-                lambda s: "-".join(s.split("-")[1:]).strip()
-            )
+        df = format_from_database(df)
 
     # reset the index if meaningful entries are included there
     if not list(df.index.names) == [None]:

From fd081ee577541104dda0ca08375aeedd02c90233 Mon Sep 17 00:00:00 2001
From: Matthew Gidden <matthew.gidden@gmail.com>
Date: Sun, 19 Feb 2023 13:30:37 +0100
Subject: [PATCH 08/12] blacked

---
 pyam/utils.py | 42 ++++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 20 deletions(-)

diff --git a/pyam/utils.py b/pyam/utils.py
index 9b48e499a..17a0dfe0b 100644
--- a/pyam/utils.py
+++ b/pyam/utils.py
@@ -187,19 +187,19 @@ def read_file(path, fast=False, *args, **kwargs):
         # format columns for fast reading
         data = data.rename(columns={c: str(c).lower() for c in extra_cols})
         extra_cols = [str(c).lower() for c in extra_cols]
-        for c in format_kwargs['index']:
+        for c in format_kwargs["index"]:
             extra_cols.remove(c)
         # support databases
-        if 'notes' in data.columns:
+        if "notes" in data.columns:
             data = format_from_database(data)
-            extra_cols.remove('notes')
+            extra_cols.remove("notes")
         # force integer year columns
-        if time_col == 'year':
-           data = data.rename(columns={c: int(c) for c in data_cols})
+        if time_col == "year":
+            data = data.rename(columns={c: int(c) for c in data_cols})
         # support file data in long format
-        if 'value' in extra_cols:
-            extra_cols.remove('value')
-        idx = IAMC_IDX + list(set(format_kwargs['index'] + extra_cols) - set(IAMC_IDX))
+        if "value" in extra_cols:
+            extra_cols.remove("value")
+        idx = IAMC_IDX + list(set(format_kwargs["index"] + extra_cols) - set(IAMC_IDX))
         return fast_format_data(data.set_index(idx), **format_kwargs)
     else:
         return format_data(data, **format_kwargs)
@@ -239,20 +239,22 @@ def fast_format_data(df, index=DEFAULT_META_INDEX):
 
     Requirements:
     1. either a pd.Series or pd.DataFrame with a pyam-compatible MultiIndex
-    2. if a pd.DataFrame, all columns as either integer year or datetime 
+    2. if a pd.DataFrame, all columns as either integer year or datetime
     3. no null values
     """
     if not isinstance(df, (pd.DataFrame, pd.Series)):
-        raise TypeError("Fast format only works if provided a pd.DataFrame or pd.Series")
+        raise TypeError(
+            "Fast format only works if provided a pd.DataFrame or pd.Series"
+        )
     if set(IAMC_IDX) - set(df.index.names):
         raise ValueError(
             f"Missing required index levels: {set(IAMC_IDX) - set(df.index.names)}"
-            )
-    
+        )
+
     # index in expected order
     extra_cols = list(set(df.index.names).difference((set(IAMC_IDX) | set(index))))
-    if len(set(['time', 'year']) - set(extra_cols)) == 0:
-        raise ValueError('Can not have time and year as indicies')
+    if len(set(["time", "year"]) - set(extra_cols)) == 0:
+        raise ValueError("Can not have time and year as indicies")
     idx = IAMC_IDX + list(set(index + extra_cols) - set(IAMC_IDX))
     df = df.reorder_levels(idx)
 
@@ -262,13 +264,14 @@ def fast_format_data(df, index=DEFAULT_META_INDEX):
         df = df.rename_axis(columns=time_col)
         df = df.stack()
     else:
-        time_col = list(set(['time', 'year']) & set(extra_cols))[0]
-        extra_cols = list(set(extra_cols) - set(['time', 'year']))
+        time_col = list(set(["time", "year"]) & set(extra_cols))[0]
+        extra_cols = list(set(extra_cols) - set(["time", "year"]))
 
-    df.name = 'value'
+    df.name = "value"
 
     return df, index, time_col, extra_cols
 
+
 def format_from_database(df):
     logger.info("Ignoring notes column in dataframe")
     df.drop(columns="notes", inplace=True)
@@ -278,11 +281,10 @@ def format_from_database(df):
         # model and scenario are jammed together in RCP data
         scen = df["scenario"]
         df.loc[:, "model"] = scen.apply(lambda s: s.split("-")[0].strip())
-        df.loc[:, "scenario"] = scen.apply(
-            lambda s: "-".join(s.split("-")[1:]).strip()
-        )
+        df.loc[:, "scenario"] = scen.apply(lambda s: "-".join(s.split("-")[1:]).strip())
     return df
 
+
 def format_data(df, index, **kwargs):
     """Convert a pandas.Dataframe or pandas.Series to the required format"""
 

From 2b06af78162f1c8dbd660bf98f5ec505a3a7b722 Mon Sep 17 00:00:00 2001
From: Matthew Gidden <matthew.gidden@gmail.com>
Date: Sun, 19 Feb 2023 14:38:52 +0100
Subject: [PATCH 09/12] update profiling for new structure

---
 profile/profile_init.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/profile/profile_init.py b/profile/profile_init.py
index 183a8fc61..9cd6df864 100644
--- a/profile/profile_init.py
+++ b/profile/profile_init.py
@@ -52,7 +52,7 @@ def gen_frame(data, fast):
 
 
 def profile(max=5):
-    data = {"N": [], "time": [], "type": []}
+    data = {"N": [], "time": [], "type": [], "label": []}
     for N in [int(10**n) for n in np.arange(1, max, step=0.5)]:
         print(N)
         for type in ["slow", "fast"]:
@@ -60,11 +60,15 @@ def profile(max=5):
                 strdata = pd.DataFrame(gen_str_iamc(N, k=5), columns=pyam.IAMC_IDX)
                 fdata = pd.DataFrame(gen_float(N), columns=YEARS)
                 _data = pd.concat([strdata, fdata], axis=1)
-                time, df = gen_frame(_data, fast=type == "fast")
+                fast = type == "fast"
+                if fast:
+                    _data = _data.set_index(pyam.IAMC_IDX)
+                time, df = gen_frame(_data, fast=fast)
                 print(N, type, time)
                 data["N"].append(N)
                 data["type"].append(type)
                 data["time"].append(time)
+                data["label"].append("autogenerated")
             except:
                 continue
     return pd.DataFrame.from_dict(data)
@@ -76,12 +80,13 @@ def gen_frame_from_file(file, fast):
 
 
 def profile_file(fname):
-    data = {"N": [], "time": [], "type": []}
+    data = {"N": [], "time": [], "type": [], "label": []}
     for type in ["slow", "fast"]:
         time, df = gen_frame_from_file(fname, fast=type == "fast")
         data["N"].append(len(df))
         data["type"].append(type)
         data["time"].append(time)
+        data["label"].append("from file")
     return pd.DataFrame.from_dict(data)
 
 
@@ -90,7 +95,7 @@ def main():
     import matplotlib.pyplot as plt
     import seaborn as sns
 
-    dfp = profile(max=6)
+    dfp = profile(max=7)
     df6 = profile_file(fname=Path("./data/AR6_Scenarios_Database_World_v1.0.csv"))
     df = pd.concat([dfp, df6]).reset_index()
     df.to_csv("profile_init.csv")

From 58c9822e8e4c2aa71b42e14cc091fe28973a019b Mon Sep 17 00:00:00 2001
From: Matthew Gidden <matthew.gidden@gmail.com>
Date: Mon, 20 Feb 2023 08:38:56 +0100
Subject: [PATCH 10/12] refactor for using stack instead of melt

---
 profile/profile_init.py |  4 +--
 pyam/utils.py           | 59 ++++++++++++++++++++++++-----------------
 2 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/profile/profile_init.py b/profile/profile_init.py
index 9cd6df864..5741a4ecf 100644
--- a/profile/profile_init.py
+++ b/profile/profile_init.py
@@ -48,6 +48,8 @@ def gen_float(N, years=YEARS):
 
 @timeit
 def gen_frame(data, fast):
+    if fast:
+        data = data.set_index(pyam.IAMC_IDX)
     return pyam.IamDataFrame(data, fast=fast)
 
 
@@ -61,8 +63,6 @@ def profile(max=5):
                 fdata = pd.DataFrame(gen_float(N), columns=YEARS)
                 _data = pd.concat([strdata, fdata], axis=1)
                 fast = type == "fast"
-                if fast:
-                    _data = _data.set_index(pyam.IAMC_IDX)
                 time, df = gen_frame(_data, fast=fast)
                 print(N, type, time)
                 data["N"].append(N)
diff --git a/pyam/utils.py b/pyam/utils.py
index 17a0dfe0b..6ed6b74a1 100644
--- a/pyam/utils.py
+++ b/pyam/utils.py
@@ -369,6 +369,9 @@ def convert_r_columns(c):
     if missing_required_col:
         raise ValueError(f"Missing required columns: {missing_required_col}")
 
+    # replace missing units by an empty string for user-friendly filtering
+    df.loc[df.unit.isnull(), "unit"] = ""
+
     # check whether data in wide format (IAMC) or long format (`value` column)
     if "value" in df.columns:
         # check if time column is given as `year` (int) or `time` (datetime)
@@ -383,6 +386,7 @@ def convert_r_columns(c):
             for c in df.columns
             if c not in index + REQUIRED_COLS + [time_col, "value"]
         ]
+        wide = False
     else:
         # if in wide format, check if columns are years (int) or datetime
         cols = [c for c in df.columns if c not in index + REQUIRED_COLS]
@@ -411,19 +415,40 @@ def convert_r_columns(c):
             melt_cols = sorted(year_cols) + sorted(time_cols)
         if not melt_cols:
             raise ValueError("Missing time domain")
+        wide = True
+    
+    # verify that there are no nan's left (in columns), and transform data
+    idx = index + REQUIRED_COLS + extra_cols
+    null_rows = df[idx].isnull().T.any()
+    if null_rows.any():
+        _df = df[idx]
+        cols = ", ".join(_df.columns[_df.isnull().any().values])
+        raise_data_error(
+            f"Empty cells in `data` (columns: '{cols}')", _df.loc[null_rows]
+        )
+    del null_rows
 
-        # melt the dataframe
-        df = pd.melt(
-            df,
-            id_vars=index + REQUIRED_COLS + extra_cols,
-            var_name=time_col,
-            value_vars=melt_cols,
-            value_name="value",
+    if wide:
+        df = (
+            df
+            .set_index(idx)
+            [melt_cols]
+            .rename_axis(columns=time_col)
+            .stack()
+        )
+        df.name = "value"
+    else:
+        df = (
+            df
+            .set_index(idx + [time_col])
+            ['value']
         )
 
     # cast value column to numeric and drop nan
+    print('foo', type(df))
+    print(df)
     try:
-        df["value"] = pd.to_numeric(df["value"])
+        df = pd.to_numeric(df)
     except ValueError as e:
         # get the row number where the error happened
         row_nr_regex = re.compile(r"(?<=at position )\d+")
@@ -432,23 +457,7 @@ def convert_r_columns(c):
         short_error = short_error_regex.search(str(e)).group()
         raise_data_error(f"{short_error} in `data`", df.iloc[[row_nr]])
 
-    df.dropna(inplace=True, subset=["value"])
-
-    # replace missing units by an empty string for user-friendly filtering
-    df.loc[df.unit.isnull(), "unit"] = ""
-
-    # verify that there are no nan's left (in columns)
-    null_rows = df.isnull().T.any()
-    if null_rows.any():
-        cols = ", ".join(df.columns[df.isnull().any().values])
-        raise_data_error(
-            f"Empty cells in `data` (columns: '{cols}')", df.loc[null_rows]
-        )
-    del null_rows
-
-    # cast to pd.Series, check for duplicates
-    idx_cols = index + REQUIRED_COLS + [time_col] + extra_cols
-    df = df.set_index(idx_cols).value
+    df = df.dropna()
 
     # format the time-column
     _time = [to_time(i) for i in get_index_levels(df.index, time_col)]

From 22a6fa5eaba32d3f0a76245f0ca43c191313921c Mon Sep 17 00:00:00 2001
From: Matthew Gidden <matthew.gidden@gmail.com>
Date: Mon, 20 Feb 2023 09:06:29 +0100
Subject: [PATCH 11/12] bump workflow to do mpl tests on 3.8 since something
 upstream broke defaults

---
 .github/workflows/pytest.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index f49c5c051..9da1d7259 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -41,16 +41,16 @@ jobs:
       run: pip install .[tests,optional_plotting,optional_io_formats,tutorials]
 
     - name: Test with pytest
-      if: ${{ matrix.python-version != '3.9' }}
+      if: ${{ matrix.python-version != '3.8' }}
       run: pytest tests
 
-    # only execute Matplotlib tests on latest Python version
+    # only execute Matplotlib tests on a known stable Python + deps version
     - name: Test with pytest including Matplotlib & Codecov
-      if: ${{ matrix.python-version == '3.9' }}
+      if: ${{ matrix.python-version == '3.8' }}
       run: pytest tests --mpl --cov=./ --cov-report=xml
 
     - name: Upload coverage report to Codecov
-      if: ${{ matrix.os == 'ubuntu-latest' && matrix.python-version == '3.9' }}
+      if: ${{ matrix.os == 'ubuntu-latest' && matrix.python-version == '3.8' }}
       uses: codecov/codecov-action@v1
       with:
         file: ./coverage.xml

From 80cf8c585ca6eabd5bd8e55be036fc86070531bb Mon Sep 17 00:00:00 2001
From: Matthew Gidden <matthew.gidden@gmail.com>
Date: Mon, 20 Feb 2023 14:58:36 +0100
Subject: [PATCH 12/12] errant prints

---
 pyam/utils.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pyam/utils.py b/pyam/utils.py
index 6ed6b74a1..b47c42fcd 100644
--- a/pyam/utils.py
+++ b/pyam/utils.py
@@ -445,8 +445,6 @@ def convert_r_columns(c):
         )
 
     # cast value column to numeric and drop nan
-    print('foo', type(df))
-    print(df)
     try:
         df = pd.to_numeric(df)
     except ValueError as e: