Skip to content

Commit

Permalink
deploy test
Browse files Browse the repository at this point in the history
  • Loading branch information
asnyv committed Aug 13, 2020
1 parent 964717c commit 2b5079a
Show file tree
Hide file tree
Showing 6 changed files with 249 additions and 40 deletions.
139 changes: 139 additions & 0 deletions tests/unit_tests/data_input/test_calc_from_cumulatives.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import pytest
import pandas as pd
import webviz_subsurface._datainput.from_timeseries_cumulatives as from_cum

DATA_DF = pd.read_csv(
"./webviz-subsurface-testdata/reek_history_match/share/results/tables/"
+ "unsmry--monthly.csv"
)
DATA_DF.DATE = DATA_DF.DATE.astype(str)


def test_calc_from_cumulatives():
# Includes monthly data, 10 reals x 4 ensembles, 3 years and 1 month (2000-01-01 to 2003-02-01)

## Test single column key, FOPT as average rate avg_fopr, monthly
calc_df = from_cum.calc_from_cumulatives(
data=DATA_DF,
column_keys="FOPT",
time_index="monthly",
time_index_input="monthly",
as_rate=True,
)

# Test real 0, iter-2
real_data = DATA_DF[(DATA_DF["REAL"] == 0) & (DATA_DF["ENSEMBLE"] == "iter-2")]
real_calc = calc_df[(calc_df["REAL"] == 0) & (calc_df["ENSEMBLE"] == "iter-2")]

assert real_calc[real_calc.DATE == "2000-01-01"]["AVG_FOPR"].values == (
(
real_data[real_data.DATE == "2000-02-01"]["FOPT"].values
- real_data[real_data.DATE == "2000-01-01"]["FOPT"].values
)
/ 31
)

assert real_calc[real_calc.DATE == "2002-05-01"]["AVG_FOPR"].values == (
(
real_data[real_data.DATE == "2002-06-01"]["FOPT"].values
- real_data[real_data.DATE == "2002-05-01"]["FOPT"].values
)
/ 31
)

## Test multiple column keys, WOPT:OP_1 as average rate avg_fopr, monthly
calc_df = from_cum.calc_from_cumulatives(
data=DATA_DF,
column_keys=["WOPT:OP_1", "GOPT:OP"],
time_index="yearly",
time_index_input="monthly",
as_rate=True,
)
# Test real 4, iter-0
real_data = DATA_DF[(DATA_DF["REAL"] == 4) & (DATA_DF["ENSEMBLE"] == "iter-0")]
real_calc = calc_df[(calc_df["REAL"] == 4) & (calc_df["ENSEMBLE"] == "iter-0")]

assert real_calc[real_calc.DATE == "2000-01-01"]["AVG_WOPR:OP_1"].values == (
(
real_data[real_data.DATE == "2001-01-01"]["WOPT:OP_1"].values
- real_data[real_data.DATE == "2000-01-01"]["WOPT:OP_1"].values
)
/ 366
)

assert real_calc[real_calc.DATE == "2002-01-01"]["AVG_GOPR:OP"].values == (
(
real_data[real_data.DATE == "2003-01-01"]["GOPT:OP"].values
- real_data[real_data.DATE == "2002-01-01"]["GOPT:OP"].values
)
/ 365
)

assert real_calc[real_calc.DATE == "2002-01-01"]["AVG_WOPR:OP_1"].values == (
(
real_data[real_data.DATE == "2003-01-01"]["WOPT:OP_1"].values
- real_data[real_data.DATE == "2002-01-01"]["WOPT:OP_1"].values
)
/ 365
)

assert real_calc[real_calc.DATE == "2001-01-01"]["AVG_GOPR:OP"].values == (
(
real_data[real_data.DATE == "2002-01-01"]["GOPT:OP"].values
- real_data[real_data.DATE == "2001-01-01"]["GOPT:OP"].values
)
/ 365
)

## Test multiple column keys, WOPR_OP as average rate avg_fopr, monthly
calc_df = from_cum.calc_from_cumulatives(
data=DATA_DF,
column_keys=["WGPT:OP_2", "GWPT:OP"],
time_index="monthly",
time_index_input="monthly",
as_rate=False,
)
# Test real 9, iter-0
real_data = DATA_DF[(DATA_DF["REAL"] == 9) & (DATA_DF["ENSEMBLE"] == "iter-0")]
real_calc = calc_df[(calc_df["REAL"] == 9) & (calc_df["ENSEMBLE"] == "iter-0")]

assert real_calc[real_calc.DATE == "2000-01-01"]["INTVL_WGPT:OP_2"].values == (
real_data[real_data.DATE == "2000-01-01"]["WGPT:OP_2"].values
- real_data[real_data.DATE == "2000-02-01"]["WGPT:OP_2"].values
)

assert real_calc[real_calc.DATE == "2002-05-01"]["INTVL_GWPT:OP"].values == (
real_data[real_data.DATE == "2002-06-01"]["GWPT:OP"].values
- real_data[real_data.DATE == "2002-05-01"]["GWPT:OP"].values
)

assert real_calc[real_calc.DATE == "2000-12-01"]["INTVL_WGPT:OP_2"].values == (
real_data[real_data.DATE == "2001-01-01"]["WGPT:OP_2"].values
- real_data[real_data.DATE == "2000-12-01"]["WGPT:OP_2"].values
)

assert real_calc[real_calc.DATE == "2002-02-01"]["INTVL_GWPT:OP"].values == (
real_data[real_data.DATE == "2002-03-01"]["GWPT:OP"].values
- real_data[real_data.DATE == "2002-02-01"]["GWPT:OP"].values
)


def test_calc_from_cumulatives_errors():
with pytest.raises(ValueError):
# The test input data is monthly, so time_index_input should be monthly
calc_df = from_cum.calc_from_cumulatives(
data=DATA_DF,
column_keys=["WGPT:OP_2", "GWPT:OP"],
time_index="monthly",
time_index_input="yearly",
as_rate=False,
)
with pytest.raises(ValueError):
# The test input data is monthly, so time_index == "daily" should not be allowed.
calc_df = from_cum.calc_from_cumulatives(
data=DATA_DF,
column_keys=["WGPT:OP_2", "GWPT:OP"],
time_index="daily",
time_index_input="monthly",
as_rate=True,
)
46 changes: 46 additions & 0 deletions tests/unit_tests/utils_tests/test_simulation_timeseries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import webviz_subsurface._utils.simulation_timeseries as simulation_timeseries


def test_date_to_interval_conversion():
assert (
simulation_timeseries.date_to_interval_conversion(
date="2000-01-01", vector="AVG_FOPR", interval="monthly", as_date=False
)
== "2000-01"
)
assert (
simulation_timeseries.date_to_interval_conversion(
date="2003-05-12", vector="AVG_WOPR:OP_1", interval="monthly", as_date=False
)
== "2003-05"
)
assert (
simulation_timeseries.date_to_interval_conversion(
date="2002-05-12", vector="AVG_WOPR:OP_1", interval="yearly", as_date=False
)
== "2002"
)
assert (
simulation_timeseries.date_to_interval_conversion(
date="2002-05-12", vector="AVG_WOPR:OP_1", interval="yearly", as_date=True
)
== "2002-01-01"
)
assert (
simulation_timeseries.date_to_interval_conversion(
date="2002-05-12", vector="AVG_WOPR:OP_1", interval="daily", as_date=False
)
== "2002-05-12"
)
assert (
simulation_timeseries.date_to_interval_conversion(
date="2002-05-12", vector="AVG_WOPR:OP_1", interval="daily", as_date=True
)
== "2002-05-12"
)
assert (
simulation_timeseries.date_to_interval_conversion(
date=None, vector="AVG_WOPR:OP_1", interval="daily", as_date=True
)
is None
)
15 changes: 5 additions & 10 deletions webviz_subsurface/_abbreviations/reservoir_simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,8 @@ def simulation_vector_description(vector: str) -> str:
and SIMULATION_VECTOR_TERMINOLOGY[vector_base_name]["type"] == "region"
):
return (
prefix
+ f"{SIMULATION_VECTOR_TERMINOLOGY[vector_base_name]['description']}"
+ f", region {fip} {node}"
f"{prefix}{SIMULATION_VECTOR_TERMINOLOGY[vector_base_name]['description']}"
f", region {fip} {node}"
)
elif vector_name.startswith("W") and vector_name[4] == "L":
# These are completion vectors, e.g. WWCTL:__1:OP_1 and WOPRL_10:OP_1 for
Expand All @@ -77,19 +76,15 @@ def simulation_vector_description(vector: str) -> str:
== "completion"
):
return (
prefix
+ f"{SIMULATION_VECTOR_TERMINOLOGY[vector_base_name]['description']}"
+ f", well {node} completion {comp}"
f"{prefix}{SIMULATION_VECTOR_TERMINOLOGY[vector_base_name]['description']}"
f", well {node} completion {comp}"
)

if vector_name in SIMULATION_VECTOR_TERMINOLOGY:
metadata = SIMULATION_VECTOR_TERMINOLOGY[vector_name]
if node is None:
return prefix + metadata["description"]
return (
prefix
+ f"{metadata['description']}, {metadata['type'].replace('_', ' ')} {node}"
)
return f"{prefix}{metadata['description']}, {metadata['type'].replace('_', ' ')} {node}"

if not vector.startswith(
("AU", "BU", "CU", "FU", "GU", "RU", "SU", "WU", "Recovery Factor of")
Expand Down
39 changes: 28 additions & 11 deletions webviz_subsurface/_datainput/from_timeseries_cumulatives.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ def calc_from_cumulatives(
column_keys: Union[List[str], str],
time_index: str,
time_index_input: str,
as_rate: Dict[str, bool] = {},
as_rate: bool,
) -> pd.DataFrame:
"""Calculates interval delta and average rate at given time interval `time_index`.
Assumes that the data is already sampled to a time interval `time_index_input`.
Expand All @@ -34,7 +34,7 @@ def calc_from_cumulatives(

# Converting the DATE axis to datetime to allow for timedeltas
data.loc[:, ["DATE"]] = pd.to_datetime(data["DATE"])
_verify_time_index_input(data, time_index_input)
_verify_time_index(data, time_index, time_index_input)
# Creating a column of unique values per ensemble-realization combination. A non-zero
# diff of this column will then mean that it is a diff between different realizations.
# Could alternatively loop over ensembles and realizations, but this is quicker for
Expand All @@ -54,9 +54,7 @@ def calc_from_cumulatives(

data.reset_index(level=["ENSEMBLE", "REAL"], inplace=True)

calc_cols = {
vec: rename_vec_from_cum(vec, as_rate.get(vec, False)) for vec in column_keys
}
calc_cols = {vec: rename_vec_from_cum(vec, as_rate) for vec in column_keys}
listed_calc_cols = [calc_cols[col] for col in column_keys]

# Take diff of given column_keys + the ensemble-realization identifier.
Expand All @@ -80,11 +78,10 @@ def calc_from_cumulatives(
if as_rate:
days = diff_cum["DATE"].diff().shift(-1).dt.days.fillna(value=0)
for vec in column_keys:
if as_rate.get(vec, False):
with np.errstate(invalid="ignore"):
diff_cum.loc[:, calc_cols[vec]] = (
diff_cum[calc_cols[vec]].values / days.values
)
with np.errstate(invalid="ignore"):
diff_cum.loc[:, calc_cols[vec]] = (
diff_cum[calc_cols[vec]].values / days.values
)

# Set last value of each real to 0 (as we don't loop over the realizations)
diff_cum.loc[diff_cum["ensrealuid"] != 0, listed_calc_cols] = 0
Expand All @@ -93,15 +90,26 @@ def calc_from_cumulatives(
return diff_cum


def _verify_time_index_input(df: pd.DataFrame, time_index_input: str):
def _verify_time_index(df: pd.DataFrame, time_index: str, time_index_input: str):
freqs = {"D": "daily", "MS": "monthly", "YS": "yearly"}
valid_time_indices = {
"daily": ["daily", "monthly", "yearly"],
"monthly": ["monthly", "yearly"],
"yearly": ["yearly"],
}
inferred_frequency = pd.infer_freq(sorted(df["DATE"].unique()))
if not freqs.get(inferred_frequency) == time_index_input:
raise ValueError(
"The DataFrame most likely contains data points which are not sampled on "
f"frequency time_index_input={time_index_input}. The inferred frequency from the "
f"unique DATE values was {inferred_frequency}."
)
if not time_index in valid_time_indices[time_index_input]:
raise ValueError(
f"The time_index {time_index} has higher frequency than time_index_input "
f"{time_index_input}. Valid time_index options are "
f"{valid_time_indices[time_index_input]}."
)


def _resample_time_index(
Expand Down Expand Up @@ -133,6 +141,15 @@ def _resample_time_index(


def rename_vec_from_cum(vector: str, as_rate: bool):
"""This function assumes that it is a cumulative/total vector named in the Eclipse standard
and is fairly naive when converting to rate. Based in the list in libecl
https://github.com/equinor/libecl/blob/69f1ee0ddf696c87b6d85eca37eed7e8b66ac2db/\
lib/ecl/smspec_node.cpp#L531-L586
the T identifying total/cumulative should not occur before letter 4,
as all the listed strings are prefixed with one or two letters in the vectors.
Therefore starting the replace at the position 3 (4th letter) to reduce risk of errors
in the conversion to rate naming, but hard to be completely safe.
"""
return (
f"AVG_{vector[0:3] + vector[3:].replace('T', 'R', 1)}"
if as_rate
Expand Down
24 changes: 24 additions & 0 deletions webviz_subsurface/_utils/simulation_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,27 @@ def render_hovertemplate(vector: str, interval: Optional[str]):
return "(%{x|%Y}, %{y})<br>"
raise ValueError(f"Interval {interval} is not supported.")
return "(%{x}, %{y})<br>" # Plotly's default behavior


def date_to_interval_conversion(
date: Optional[str], vector: str, interval: str, as_date: bool = False
) -> Optional[str]:
"""Converts a date on form YYYY-MM-DD to an interval 'date' if the
vector is a vector that is calculated from cumulatives (AVG_ or INTVL_).
If as_date=True, the date returned will be the the first date of the interval
(independent of which date in the interval the input is),
if not, the 'date' returned will be the common basis for the interval,
e.g. YYYY-MM-DD for daily, YYYY-MM for monthly and YYYY for yearly.
The input date must be a date that can be reduced to the interval date by simply
removing terms, e.g. is both 2001-05-16 and 2001-05-01 ok for monthly and will return
2001-05.
"""
if date is None:
return date
if vector.startswith(("AVG_", "INTVL_")):
if interval == "monthly":
date = "-".join(date.split("-")[0:2]) + ("-01" if as_date else "")
if interval == "yearly":
date = date.split("-")[0] + ("-01-01" if as_date else "")
return date
Loading

0 comments on commit 2b5079a

Please sign in to comment.