deploy test

equinor · Aug 13, 2020 · 2b5079a · 2b5079a
1 parent 964717c
commit 2b5079a
Show file tree

Hide file tree

Showing 6 changed files with 249 additions and 40 deletions.
diff --git a/tests/unit_tests/data_input/test_calc_from_cumulatives.py b/tests/unit_tests/data_input/test_calc_from_cumulatives.py
@@ -0,0 +1,139 @@
+import pytest
+import pandas as pd
+import webviz_subsurface._datainput.from_timeseries_cumulatives as from_cum
+
+DATA_DF = pd.read_csv(
+    "./webviz-subsurface-testdata/reek_history_match/share/results/tables/"
+    + "unsmry--monthly.csv"
+)
+DATA_DF.DATE = DATA_DF.DATE.astype(str)
+
+
+def test_calc_from_cumulatives():
+    # Includes monthly data, 10 reals x 4 ensembles, 3 years and 1 month (2000-01-01 to 2003-02-01)
+
+    ## Test single column key, FOPT as average rate avg_fopr, monthly
+    calc_df = from_cum.calc_from_cumulatives(
+        data=DATA_DF,
+        column_keys="FOPT",
+        time_index="monthly",
+        time_index_input="monthly",
+        as_rate=True,
+    )
+
+    # Test real 0, iter-2
+    real_data = DATA_DF[(DATA_DF["REAL"] == 0) & (DATA_DF["ENSEMBLE"] == "iter-2")]
+    real_calc = calc_df[(calc_df["REAL"] == 0) & (calc_df["ENSEMBLE"] == "iter-2")]
+
+    assert real_calc[real_calc.DATE == "2000-01-01"]["AVG_FOPR"].values == (
+        (
+            real_data[real_data.DATE == "2000-02-01"]["FOPT"].values
+            - real_data[real_data.DATE == "2000-01-01"]["FOPT"].values
+        )
+        / 31
+    )
+
+    assert real_calc[real_calc.DATE == "2002-05-01"]["AVG_FOPR"].values == (
+        (
+            real_data[real_data.DATE == "2002-06-01"]["FOPT"].values
+            - real_data[real_data.DATE == "2002-05-01"]["FOPT"].values
+        )
+        / 31
+    )
+
+    ## Test multiple column keys, WOPT:OP_1 as average rate avg_fopr, monthly
+    calc_df = from_cum.calc_from_cumulatives(
+        data=DATA_DF,
+        column_keys=["WOPT:OP_1", "GOPT:OP"],
+        time_index="yearly",
+        time_index_input="monthly",
+        as_rate=True,
+    )
+    # Test real 4, iter-0
+    real_data = DATA_DF[(DATA_DF["REAL"] == 4) & (DATA_DF["ENSEMBLE"] == "iter-0")]
+    real_calc = calc_df[(calc_df["REAL"] == 4) & (calc_df["ENSEMBLE"] == "iter-0")]
+
+    assert real_calc[real_calc.DATE == "2000-01-01"]["AVG_WOPR:OP_1"].values == (
+        (
+            real_data[real_data.DATE == "2001-01-01"]["WOPT:OP_1"].values
+            - real_data[real_data.DATE == "2000-01-01"]["WOPT:OP_1"].values
+        )
+        / 366
+    )
+
+    assert real_calc[real_calc.DATE == "2002-01-01"]["AVG_GOPR:OP"].values == (
+        (
+            real_data[real_data.DATE == "2003-01-01"]["GOPT:OP"].values
+            - real_data[real_data.DATE == "2002-01-01"]["GOPT:OP"].values
+        )
+        / 365
+    )
+
+    assert real_calc[real_calc.DATE == "2002-01-01"]["AVG_WOPR:OP_1"].values == (
+        (
+            real_data[real_data.DATE == "2003-01-01"]["WOPT:OP_1"].values
+            - real_data[real_data.DATE == "2002-01-01"]["WOPT:OP_1"].values
+        )
+        / 365
+    )
+
+    assert real_calc[real_calc.DATE == "2001-01-01"]["AVG_GOPR:OP"].values == (
+        (
+            real_data[real_data.DATE == "2002-01-01"]["GOPT:OP"].values
+            - real_data[real_data.DATE == "2001-01-01"]["GOPT:OP"].values
+        )
+        / 365
+    )
+
+    ## Test multiple column keys, WOPR_OP as average rate avg_fopr, monthly
+    calc_df = from_cum.calc_from_cumulatives(
+        data=DATA_DF,
+        column_keys=["WGPT:OP_2", "GWPT:OP"],
+        time_index="monthly",
+        time_index_input="monthly",
+        as_rate=False,
+    )
+    # Test real 9, iter-0
+    real_data = DATA_DF[(DATA_DF["REAL"] == 9) & (DATA_DF["ENSEMBLE"] == "iter-0")]
+    real_calc = calc_df[(calc_df["REAL"] == 9) & (calc_df["ENSEMBLE"] == "iter-0")]
+
+    assert real_calc[real_calc.DATE == "2000-01-01"]["INTVL_WGPT:OP_2"].values == (
+        real_data[real_data.DATE == "2000-01-01"]["WGPT:OP_2"].values
+        - real_data[real_data.DATE == "2000-02-01"]["WGPT:OP_2"].values
+    )
+
+    assert real_calc[real_calc.DATE == "2002-05-01"]["INTVL_GWPT:OP"].values == (
+        real_data[real_data.DATE == "2002-06-01"]["GWPT:OP"].values
+        - real_data[real_data.DATE == "2002-05-01"]["GWPT:OP"].values
+    )
+
+    assert real_calc[real_calc.DATE == "2000-12-01"]["INTVL_WGPT:OP_2"].values == (
+        real_data[real_data.DATE == "2001-01-01"]["WGPT:OP_2"].values
+        - real_data[real_data.DATE == "2000-12-01"]["WGPT:OP_2"].values
+    )
+
+    assert real_calc[real_calc.DATE == "2002-02-01"]["INTVL_GWPT:OP"].values == (
+        real_data[real_data.DATE == "2002-03-01"]["GWPT:OP"].values
+        - real_data[real_data.DATE == "2002-02-01"]["GWPT:OP"].values
+    )
+
+
+def test_calc_from_cumulatives_errors():
+    with pytest.raises(ValueError):
+        # The test input data is monthly, so time_index_input should be monthly
+        calc_df = from_cum.calc_from_cumulatives(
+            data=DATA_DF,
+            column_keys=["WGPT:OP_2", "GWPT:OP"],
+            time_index="monthly",
+            time_index_input="yearly",
+            as_rate=False,
+        )
+    with pytest.raises(ValueError):
+        # The test input data is monthly, so time_index == "daily" should not be allowed.
+        calc_df = from_cum.calc_from_cumulatives(
+            data=DATA_DF,
+            column_keys=["WGPT:OP_2", "GWPT:OP"],
+            time_index="daily",
+            time_index_input="monthly",
+            as_rate=True,
+        )
diff --git a/tests/unit_tests/utils_tests/test_simulation_timeseries.py b/tests/unit_tests/utils_tests/test_simulation_timeseries.py
@@ -0,0 +1,46 @@
+import webviz_subsurface._utils.simulation_timeseries as simulation_timeseries
+
+
+def test_date_to_interval_conversion():
+    assert (
+        simulation_timeseries.date_to_interval_conversion(
+            date="2000-01-01", vector="AVG_FOPR", interval="monthly", as_date=False
+        )
+        == "2000-01"
+    )
+    assert (
+        simulation_timeseries.date_to_interval_conversion(
+            date="2003-05-12", vector="AVG_WOPR:OP_1", interval="monthly", as_date=False
+        )
+        == "2003-05"
+    )
+    assert (
+        simulation_timeseries.date_to_interval_conversion(
+            date="2002-05-12", vector="AVG_WOPR:OP_1", interval="yearly", as_date=False
+        )
+        == "2002"
+    )
+    assert (
+        simulation_timeseries.date_to_interval_conversion(
+            date="2002-05-12", vector="AVG_WOPR:OP_1", interval="yearly", as_date=True
+        )
+        == "2002-01-01"
+    )
+    assert (
+        simulation_timeseries.date_to_interval_conversion(
+            date="2002-05-12", vector="AVG_WOPR:OP_1", interval="daily", as_date=False
+        )
+        == "2002-05-12"
+    )
+    assert (
+        simulation_timeseries.date_to_interval_conversion(
+            date="2002-05-12", vector="AVG_WOPR:OP_1", interval="daily", as_date=True
+        )
+        == "2002-05-12"
+    )
+    assert (
+        simulation_timeseries.date_to_interval_conversion(
+            date=None, vector="AVG_WOPR:OP_1", interval="daily", as_date=True
+        )
+        is None
+    )
diff --git a/webviz_subsurface/_abbreviations/reservoir_simulation.py b/webviz_subsurface/_abbreviations/reservoir_simulation.py
@@ -62,9 +62,8 @@ def simulation_vector_description(vector: str) -> str:
                 and SIMULATION_VECTOR_TERMINOLOGY[vector_base_name]["type"] == "region"
             ):
                 return (
-                    prefix
-                    + f"{SIMULATION_VECTOR_TERMINOLOGY[vector_base_name]['description']}"
-                    + f", region {fip} {node}"
+                    f"{prefix}{SIMULATION_VECTOR_TERMINOLOGY[vector_base_name]['description']}"
+                    f", region {fip} {node}"
                 )
         elif vector_name.startswith("W") and vector_name[4] == "L":
             # These are completion vectors, e.g. WWCTL:__1:OP_1 and WOPRL_10:OP_1 for
@@ -77,19 +76,15 @@ def simulation_vector_description(vector: str) -> str:
                 == "completion"
             ):
                 return (
-                    prefix
-                    + f"{SIMULATION_VECTOR_TERMINOLOGY[vector_base_name]['description']}"
-                    + f", well {node} completion {comp}"
+                    f"{prefix}{SIMULATION_VECTOR_TERMINOLOGY[vector_base_name]['description']}"
+                    f", well {node} completion {comp}"
                 )
 
     if vector_name in SIMULATION_VECTOR_TERMINOLOGY:
         metadata = SIMULATION_VECTOR_TERMINOLOGY[vector_name]
         if node is None:
             return prefix + metadata["description"]
-        return (
-            prefix
-            + f"{metadata['description']}, {metadata['type'].replace('_', ' ')} {node}"
-        )
+        return f"{prefix}{metadata['description']}, {metadata['type'].replace('_', ' ')} {node}"
 
     if not vector.startswith(
         ("AU", "BU", "CU", "FU", "GU", "RU", "SU", "WU", "Recovery Factor of")

diff --git a/webviz_subsurface/_datainput/from_timeseries_cumulatives.py b/webviz_subsurface/_datainput/from_timeseries_cumulatives.py
@@ -9,7 +9,7 @@ def calc_from_cumulatives(
     column_keys: Union[List[str], str],
     time_index: str,
     time_index_input: str,
-    as_rate: Dict[str, bool] = {},
+    as_rate: bool,
 ) -> pd.DataFrame:
     """Calculates interval delta and average rate at given time interval `time_index`.
     Assumes that the data is already sampled to a time interval `time_index_input`.
@@ -34,7 +34,7 @@ def calc_from_cumulatives(
 
     # Converting the DATE axis to datetime to allow for timedeltas
     data.loc[:, ["DATE"]] = pd.to_datetime(data["DATE"])
-    _verify_time_index_input(data, time_index_input)
+    _verify_time_index(data, time_index, time_index_input)
     # Creating a column of unique values per ensemble-realization combination. A non-zero
     # diff of this column will then mean that it is a diff between different realizations.
     # Could alternatively loop over ensembles and realizations, but this is quicker for
@@ -54,9 +54,7 @@ def calc_from_cumulatives(
 
     data.reset_index(level=["ENSEMBLE", "REAL"], inplace=True)
 
-    calc_cols = {
-        vec: rename_vec_from_cum(vec, as_rate.get(vec, False)) for vec in column_keys
-    }
+    calc_cols = {vec: rename_vec_from_cum(vec, as_rate) for vec in column_keys}
     listed_calc_cols = [calc_cols[col] for col in column_keys]
 
     # Take diff of given column_keys + the ensemble-realization identifier.
@@ -80,11 +78,10 @@ def calc_from_cumulatives(
     if as_rate:
         days = diff_cum["DATE"].diff().shift(-1).dt.days.fillna(value=0)
         for vec in column_keys:
-            if as_rate.get(vec, False):
-                with np.errstate(invalid="ignore"):
-                    diff_cum.loc[:, calc_cols[vec]] = (
-                        diff_cum[calc_cols[vec]].values / days.values
-                    )
+            with np.errstate(invalid="ignore"):
+                diff_cum.loc[:, calc_cols[vec]] = (
+                    diff_cum[calc_cols[vec]].values / days.values
+                )
 
     # Set last value of each real to 0 (as we don't loop over the realizations)
     diff_cum.loc[diff_cum["ensrealuid"] != 0, listed_calc_cols] = 0
@@ -93,15 +90,26 @@ def calc_from_cumulatives(
     return diff_cum
 
 
-def _verify_time_index_input(df: pd.DataFrame, time_index_input: str):
+def _verify_time_index(df: pd.DataFrame, time_index: str, time_index_input: str):
     freqs = {"D": "daily", "MS": "monthly", "YS": "yearly"}
+    valid_time_indices = {
+        "daily": ["daily", "monthly", "yearly"],
+        "monthly": ["monthly", "yearly"],
+        "yearly": ["yearly"],
+    }
     inferred_frequency = pd.infer_freq(sorted(df["DATE"].unique()))
     if not freqs.get(inferred_frequency) == time_index_input:
         raise ValueError(
             "The DataFrame most likely contains data points which are not sampled on "
             f"frequency time_index_input={time_index_input}. The inferred frequency from the "
             f"unique DATE values was {inferred_frequency}."
         )
+    if not time_index in valid_time_indices[time_index_input]:
+        raise ValueError(
+            f"The time_index {time_index} has higher frequency than time_index_input "
+            f"{time_index_input}. Valid time_index options are "
+            f"{valid_time_indices[time_index_input]}."
+        )
 
 
 def _resample_time_index(
@@ -133,6 +141,15 @@ def _resample_time_index(
 
 
 def rename_vec_from_cum(vector: str, as_rate: bool):
+    """This function assumes that it is a cumulative/total vector named in the Eclipse standard
+    and is fairly naive when converting to rate. Based in the list in libecl
+    https://github.com/equinor/libecl/blob/69f1ee0ddf696c87b6d85eca37eed7e8b66ac2db/\
+        lib/ecl/smspec_node.cpp#L531-L586
+    the T identifying total/cumulative should not occur before letter 4,
+    as all the listed strings are prefixed with one or two letters in the vectors.
+    Therefore starting the replace at the position 3 (4th letter) to reduce risk of errors
+    in the conversion to rate naming, but hard to be completely safe.
+    """
     return (
         f"AVG_{vector[0:3] + vector[3:].replace('T', 'R', 1)}"
         if as_rate

diff --git a/webviz_subsurface/_utils/simulation_timeseries.py b/webviz_subsurface/_utils/simulation_timeseries.py
@@ -167,3 +167,27 @@ def render_hovertemplate(vector: str, interval: Optional[str]):
             return "(%{x|%Y}, %{y})<br>"
         raise ValueError(f"Interval {interval} is not supported.")
     return "(%{x}, %{y})<br>"  # Plotly's default behavior
+
+
+def date_to_interval_conversion(
+    date: Optional[str], vector: str, interval: str, as_date: bool = False
+) -> Optional[str]:
+    """Converts a date on form YYYY-MM-DD to an interval 'date' if the
+    vector is a vector that is calculated from cumulatives (AVG_ or INTVL_).
+    If as_date=True, the date returned will be the the first date of the interval
+    (independent of which date in the interval the input is),
+    if not, the 'date' returned will be the common basis for the interval,
+    e.g. YYYY-MM-DD for daily, YYYY-MM for monthly and YYYY for yearly.
+
+    The input date must be a date that can be reduced to the interval date by simply
+    removing terms, e.g. is both 2001-05-16 and 2001-05-01 ok for monthly and will return
+    2001-05.
+    """
+    if date is None:
+        return date
+    if vector.startswith(("AVG_", "INTVL_")):
+        if interval == "monthly":
+            date = "-".join(date.split("-")[0:2]) + ("-01" if as_date else "")
+        if interval == "yearly":
+            date = date.split("-")[0] + ("-01-01" if as_date else "")
+    return date