diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index f49c5c051..9da1d7259 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -41,16 +41,16 @@ jobs:
       run: pip install .[tests,optional_plotting,optional_io_formats,tutorials]
 
     - name: Test with pytest
-      if: ${{ matrix.python-version != '3.9' }}
+      if: ${{ matrix.python-version != '3.8' }}
       run: pytest tests
 
-    # only execute Matplotlib tests on latest Python version
+    # only execute Matplotlib tests on a known stable Python + deps version
     - name: Test with pytest including Matplotlib & Codecov
-      if: ${{ matrix.python-version == '3.9' }}
+      if: ${{ matrix.python-version == '3.8' }}
       run: pytest tests --mpl --cov=./ --cov-report=xml
 
     - name: Upload coverage report to Codecov
-      if: ${{ matrix.os == 'ubuntu-latest' && matrix.python-version == '3.9' }}
+      if: ${{ matrix.os == 'ubuntu-latest' && matrix.python-version == '3.8' }}
       uses: codecov/codecov-action@v1
       with:
         file: ./coverage.xml
diff --git a/profile/profile_init.py b/profile/profile_init.py
new file mode 100644
index 000000000..5741a4ecf
--- /dev/null
+++ b/profile/profile_init.py
@@ -0,0 +1,110 @@
+import string
+import numpy as np
+import pandas as pd
+from functools import wraps
+from pathlib import Path
+import time
+
+import pyam
+
+YEARS = range(2010, 2101, 10)
+
+
+def timeit(func):
+    @wraps(func)
+    def timeit_wrapper(*args, **kwargs):
+        start_time = time.perf_counter()
+        result = func(*args, **kwargs)
+        end_time = time.perf_counter()
+        total_time = end_time - start_time
+        return total_time, result
+
+    return timeit_wrapper
+
+
+def join(a):
+    return "".join(a)
+
+
+def gen_str(N, k=1):
+    return np.random.choice(
+        list(string.ascii_lowercase), size=(k, N, len(pyam.IAMC_IDX))
+    )
+
+
+def gen_str_iamc(N, k=1):
+    return np.apply_along_axis(join, 0, gen_str(N, k))
+
+
+def gen_float(N, years=YEARS):
+    return np.random.choice(
+        range(10),
+        size=(
+            N,
+            len(years),
+        ),
+    )
+
+
+@timeit
+def gen_frame(data, fast):
+    if fast:
+        data = data.set_index(pyam.IAMC_IDX)
+    return pyam.IamDataFrame(data, fast=fast)
+
+
+def profile(max=5):
+    data = {"N": [], "time": [], "type": [], "label": []}
+    for N in [int(10**n) for n in np.arange(1, max, step=0.5)]:
+        print(N)
+        for type in ["slow", "fast"]:
+            try:
+                strdata = pd.DataFrame(gen_str_iamc(N, k=5), columns=pyam.IAMC_IDX)
+                fdata = pd.DataFrame(gen_float(N), columns=YEARS)
+                _data = pd.concat([strdata, fdata], axis=1)
+                fast = type == "fast"
+                time, df = gen_frame(_data, fast=fast)
+                print(N, type, time)
+                data["N"].append(N)
+                data["type"].append(type)
+                data["time"].append(time)
+                data["label"].append("autogenerated")
+            except:
+                continue
+    return pd.DataFrame.from_dict(data)
+
+
+@timeit
+def gen_frame_from_file(file, fast):
+    return pyam.IamDataFrame(file, fast=fast)
+
+
+def profile_file(fname):
+    data = {"N": [], "time": [], "type": [], "label": []}
+    for type in ["slow", "fast"]:
+        time, df = gen_frame_from_file(fname, fast=type == "fast")
+        data["N"].append(len(df))
+        data["type"].append(type)
+        data["time"].append(time)
+        data["label"].append("from file")
+    return pd.DataFrame.from_dict(data)
+
+
+def main():
+    # requires downloading AR6 dataset and placing it in the data folder
+    import matplotlib.pyplot as plt
+    import seaborn as sns
+
+    dfp = profile(max=7)
+    df6 = profile_file(fname=Path("./data/AR6_Scenarios_Database_World_v1.0.csv"))
+    df = pd.concat([dfp, df6]).reset_index()
+    df.to_csv("profile_init.csv")
+    print(df)
+    fig, ax = plt.subplots()
+    sns.lineplot(data=df, x="N", y="time", hue="type", ax=ax)
+    ax.set(xscale="log")
+    fig.savefig("profile_init.png")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pyam/core.py b/pyam/core.py
index 5bd6d0a25..a016627de 100755
--- a/pyam/core.py
+++ b/pyam/core.py
@@ -28,6 +28,7 @@
     write_sheet,
     read_file,
     read_pandas,
+    fast_format_data,
     format_data,
     merge_meta,
     find_depth,
@@ -120,7 +121,7 @@ class IamDataFrame(object):
     for those who are not used to the pandas/Python universe.
     """
 
-    def __init__(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs):
+    def __init__(self, data, meta=None, index=DEFAULT_META_INDEX, fast=False, **kwargs):
         """Initialize an instance of an IamDataFrame"""
         if isinstance(data, IamDataFrame):
             if kwargs:
@@ -133,9 +134,9 @@ def __init__(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs):
             for attr, value in data.__dict__.items():
                 setattr(self, attr, value)
         else:
-            self._init(data, meta, index=index, **kwargs)
+            self._init(data, meta, index=index, fast=fast, **kwargs)
 
-    def _init(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs):
+    def _init(self, data, meta=None, index=DEFAULT_META_INDEX, fast=False, **kwargs):
         """Process data and set attributes for new instance"""
 
         # pop kwarg for meta_sheet_name (prior to reading data from file)
@@ -159,11 +160,14 @@ def _init(self, data, meta=None, index=DEFAULT_META_INDEX, **kwargs):
             if not data.is_file():
                 raise FileNotFoundError(f"No such file: '{data}'")
             logger.info(f"Reading file {data}")
-            _data = read_file(data, index=index, **kwargs)
+            _data = read_file(data, index=index, fast=fast, **kwargs)
 
         # cast data from pandas
-        elif isinstance(data, pd.DataFrame) or isinstance(data, pd.Series):
-            _data = format_data(data.copy(), index=index, **kwargs)
+        elif isinstance(data, (pd.DataFrame, pd.Series)):
+            if fast:
+                _data = fast_format_data(data, index=index, **kwargs)
+            else:
+                _data = format_data(data.copy(), index=index, **kwargs)
 
         # unsupported `data` args
         elif islistable(data):
diff --git a/pyam/utils.py b/pyam/utils.py
index d1f9de068..b47c42fcd 100644
--- a/pyam/utils.py
+++ b/pyam/utils.py
@@ -169,20 +169,125 @@ def is_empty(name, s):
             return False
 
         empty_cols = [c for c in df.columns if is_empty(c, df[c])]
-        return df.drop(columns=empty_cols).dropna(axis=0, how="all")
+        df.drop(columns=empty_cols, inplace=True)
+        df.dropna(axis=0, how="all", inplace=True)
+        return df
 
 
-def read_file(path, *args, **kwargs):
+def read_file(path, fast=False, *args, **kwargs):
     """Read data from a file"""
     # extract kwargs that are intended for `format_data`
     format_kwargs = dict(index=kwargs.pop("index"))
     for c in [i for i in IAMC_IDX + ["year", "time", "value"] if i in kwargs]:
         format_kwargs[c] = kwargs.pop(c)
-    return format_data(read_pandas(path, *args, **kwargs), **format_kwargs)
+    data = read_pandas(path, *args, **kwargs)
+    if fast:
+        # determine non-data columns
+        extra_cols, time_col, data_cols = intuit_column_groups(data)
+        # format columns for fast reading
+        data = data.rename(columns={c: str(c).lower() for c in extra_cols})
+        extra_cols = [str(c).lower() for c in extra_cols]
+        for c in format_kwargs["index"]:
+            extra_cols.remove(c)
+        # support databases
+        if "notes" in data.columns:
+            data = format_from_database(data)
+            extra_cols.remove("notes")
+        # force integer year columns
+        if time_col == "year":
+            data = data.rename(columns={c: int(c) for c in data_cols})
+        # support file data in long format
+        if "value" in extra_cols:
+            extra_cols.remove("value")
+        idx = IAMC_IDX + list(set(format_kwargs["index"] + extra_cols) - set(IAMC_IDX))
+        return fast_format_data(data.set_index(idx), **format_kwargs)
+    else:
+        return format_data(data, **format_kwargs)
+
+
+def intuit_column_groups(df, index=[]):
+    cols = [c for c in df.columns if c not in index + REQUIRED_COLS]
+    year_cols, time_cols, extra_cols = [], [], []
+    for i in cols:
+        # if the column name can be cast to integer, assume it's a year column
+        try:
+            int(i)
+            year_cols.append(i)
+
+        # otherwise, try casting to datetime
+        except (ValueError, TypeError):
+            try:
+                dateutil.parser.parse(str(i))
+                time_cols.append(i)
+
+            # neither year nor datetime, so it is an extra-column
+            except ValueError:
+                extra_cols.append(i)
+    if year_cols and not time_cols:
+        time_col = "year"
+        melt_cols = sorted(year_cols)
+    else:
+        time_col = "time"
+        melt_cols = sorted(year_cols) + sorted(time_cols)
+    if not melt_cols:
+        raise ValueError("Missing time domain")
+    return extra_cols, time_col, melt_cols
+
+
+def fast_format_data(df, index=DEFAULT_META_INDEX):
+    """A faster formatting funciton with more stringent dataframe requirements
+
+    Requirements:
+    1. either a pd.Series or pd.DataFrame with a pyam-compatible MultiIndex
+    2. if a pd.DataFrame, all columns as either integer year or datetime
+    3. no null values
+    """
+    if not isinstance(df, (pd.DataFrame, pd.Series)):
+        raise TypeError(
+            "Fast format only works if provided a pd.DataFrame or pd.Series"
+        )
+    if set(IAMC_IDX) - set(df.index.names):
+        raise ValueError(
+            f"Missing required index levels: {set(IAMC_IDX) - set(df.index.names)}"
+        )
+
+    # index in expected order
+    extra_cols = list(set(df.index.names).difference((set(IAMC_IDX) | set(index))))
+    if len(set(["time", "year"]) - set(extra_cols)) == 0:
+        raise ValueError("Can not have time and year as indicies")
+    idx = IAMC_IDX + list(set(index + extra_cols) - set(IAMC_IDX))
+    df = df.reorder_levels(idx)
+
+    # migrate dataframe to series
+    if isinstance(df, pd.DataFrame):
+        _, time_col, _ = intuit_column_groups(df, index=index)
+        df = df.rename_axis(columns=time_col)
+        df = df.stack()
+    else:
+        time_col = list(set(["time", "year"]) & set(extra_cols))[0]
+        extra_cols = list(set(extra_cols) - set(["time", "year"]))
+
+    df.name = "value"
+
+    return df, index, time_col, extra_cols
+
+
+def format_from_database(df):
+    logger.info("Ignoring notes column in dataframe")
+    df.drop(columns="notes", inplace=True)
+    col = df.columns[0]  # first column has database copyright notice
+    df = df[~df[col].str.contains("database", case=False)]
+    if "scenario" in df.columns and "model" not in df.columns:
+        # model and scenario are jammed together in RCP data
+        scen = df["scenario"]
+        df.loc[:, "model"] = scen.apply(lambda s: s.split("-")[0].strip())
+        df.loc[:, "scenario"] = scen.apply(lambda s: "-".join(s.split("-")[1:]).strip())
+    return df
 
 
 def format_data(df, index, **kwargs):
     """Convert a pandas.Dataframe or pandas.Series to the required format"""
+
     if isinstance(df, pd.Series):
         df.name = df.name or "value"
         df = df.to_frame()
@@ -241,17 +346,7 @@ def convert_r_columns(c):
     df.rename(columns={c: str(c).lower() for c in str_cols}, inplace=True)
 
     if "notes" in df.columns:  # this came from the database
-        logger.info("Ignoring notes column in dataframe")
-        df.drop(columns="notes", inplace=True)
-        col = df.columns[0]  # first column has database copyright notice
-        df = df[~df[col].str.contains("database", case=False)]
-        if "scenario" in df.columns and "model" not in df.columns:
-            # model and scenario are jammed together in RCP data
-            scen = df["scenario"]
-            df.loc[:, "model"] = scen.apply(lambda s: s.split("-")[0].strip())
-            df.loc[:, "scenario"] = scen.apply(
-                lambda s: "-".join(s.split("-")[1:]).strip()
-            )
+        df = format_from_database(df)
 
     # reset the index if meaningful entries are included there
     if not list(df.index.names) == [None]:
@@ -274,6 +369,9 @@ def convert_r_columns(c):
     if missing_required_col:
         raise ValueError(f"Missing required columns: {missing_required_col}")
 
+    # replace missing units by an empty string for user-friendly filtering
+    df.loc[df.unit.isnull(), "unit"] = ""
+
     # check whether data in wide format (IAMC) or long format (`value` column)
     if "value" in df.columns:
         # check if time column is given as `year` (int) or `time` (datetime)
@@ -288,6 +386,7 @@ def convert_r_columns(c):
             for c in df.columns
             if c not in index + REQUIRED_COLS + [time_col, "value"]
         ]
+        wide = False
     else:
         # if in wide format, check if columns are years (int) or datetime
         cols = [c for c in df.columns if c not in index + REQUIRED_COLS]
@@ -316,19 +415,38 @@ def convert_r_columns(c):
             melt_cols = sorted(year_cols) + sorted(time_cols)
         if not melt_cols:
             raise ValueError("Missing time domain")
+        wide = True
+    
+    # verify that there are no nan's left (in columns), and transform data
+    idx = index + REQUIRED_COLS + extra_cols
+    null_rows = df[idx].isnull().T.any()
+    if null_rows.any():
+        _df = df[idx]
+        cols = ", ".join(_df.columns[_df.isnull().any().values])
+        raise_data_error(
+            f"Empty cells in `data` (columns: '{cols}')", _df.loc[null_rows]
+        )
+    del null_rows
 
-        # melt the dataframe
-        df = pd.melt(
-            df,
-            id_vars=index + REQUIRED_COLS + extra_cols,
-            var_name=time_col,
-            value_vars=melt_cols,
-            value_name="value",
+    if wide:
+        df = (
+            df
+            .set_index(idx)
+            [melt_cols]
+            .rename_axis(columns=time_col)
+            .stack()
+        )
+        df.name = "value"
+    else:
+        df = (
+            df
+            .set_index(idx + [time_col])
+            ['value']
         )
 
     # cast value column to numeric and drop nan
     try:
-        df["value"] = pd.to_numeric(df["value"])
+        df = pd.to_numeric(df)
     except ValueError as e:
         # get the row number where the error happened
         row_nr_regex = re.compile(r"(?<=at position )\d+")
@@ -337,23 +455,7 @@ def convert_r_columns(c):
         short_error = short_error_regex.search(str(e)).group()
         raise_data_error(f"{short_error} in `data`", df.iloc[[row_nr]])
 
-    df.dropna(inplace=True, subset=["value"])
-
-    # replace missing units by an empty string for user-friendly filtering
-    df.loc[df.unit.isnull(), "unit"] = ""
-
-    # verify that there are no nan's left (in columns)
-    null_rows = df.isnull().T.any()
-    if null_rows.any():
-        cols = ", ".join(df.columns[df.isnull().any().values])
-        raise_data_error(
-            f"Empty cells in `data` (columns: '{cols}')", df.loc[null_rows]
-        )
-    del null_rows
-
-    # cast to pd.Series, check for duplicates
-    idx_cols = index + REQUIRED_COLS + [time_col] + extra_cols
-    df = df.set_index(idx_cols).value
+    df = df.dropna()
 
     # format the time-column
     _time = [to_time(i) for i in get_index_levels(df.index, time_col)]
diff --git a/tests/test_core.py b/tests/test_core.py
index aa0064cc5..62e9ade97 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -73,6 +73,12 @@ def test_init_from_iamdf(test_df_year):
     assert test_df_year.scenario == ["scen_b", "scen_foo"]
 
 
+def test_init_fast(test_df_year):
+    obs = IamDataFrame(test_df_year, fast=True)
+    exp = IamDataFrame(test_df_year)
+    assert_iamframe_equal(obs, exp)
+
+
 def test_init_from_iamdf_raises(test_df_year):
     # casting an IamDataFrame instance again with extra args fails
     match = "Invalid arguments for initializing from IamDataFrame: {'model': 'foo'}"
diff --git a/tests/test_io.py b/tests/test_io.py
index 56add9217..94a384a10 100644
--- a/tests/test_io.py
+++ b/tests/test_io.py
@@ -226,14 +226,16 @@ def test_load_meta_empty(test_pd_df):
     assert_iamframe_equal(obs, exp)
 
 
-def test_load_ssp_database_downloaded_file(test_pd_df):
+@pytest.mark.parametrize("fast", [True, False])
+def test_load_ssp_database_downloaded_file(test_pd_df, fast):
     exp = IamDataFrame(test_pd_df).filter(**FILTER_ARGS).as_pandas()
     file = TEST_DATA_DIR / "test_SSP_database_raw_download.xlsx"
-    obs_df = IamDataFrame(file)
+    obs_df = IamDataFrame(file, fast=fast)
     pd.testing.assert_frame_equal(obs_df.as_pandas(), exp)
 
 
 def test_load_rcp_database_downloaded_file(test_pd_df):
+    # RCP data not tested for fast at present because it requires additional processing
     exp = IamDataFrame(test_pd_df).filter(**FILTER_ARGS).as_pandas()
     file = TEST_DATA_DIR / "test_RCP_database_raw_download.xlsx"
     obs_df = IamDataFrame(file)