From d24b52748260518e6237ee3fb370a77be25aae56 Mon Sep 17 00:00:00 2001 From: sam Date: Thu, 13 Jun 2024 23:08:47 -0400 Subject: [PATCH 01/22] Add initial transformation framework --- src/pudl/transform/ferc714.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/pudl/transform/ferc714.py b/src/pudl/transform/ferc714.py index 8ecf47179e..90bc621ae1 100644 --- a/src/pudl/transform/ferc714.py +++ b/src/pudl/transform/ferc714.py @@ -533,3 +533,36 @@ def out_ferc714__hourly_planning_area_demand( df[columns], table_name="out_ferc714__hourly_planning_area_demand" ) return df + +@asset( + io_manager_key="parquet_io_manager", + op_tags={"memory-use": "high"}, # Should this be high? + compute_kind="pandas", +) +def out_ferc714__yearly_planning_area_forecast_demand( # What is a planning area? + raw_ferc714__yearly_planning_area_forecast_demand: pd.DataFrame, +) -> pd.DataFrame: + """Transform the yearly planning area forecast data per Planning Area. + + Transformations include: + + - Drop unnecessary columns. + - TBD + + Args: + raw_ferc714__yearly_planning_area_forecast_demand: Raw table containing, + for each year and each entity, the forecasted summer and winter peak demand, + in megawatts, and annual net energy for load, in megawatthours, for the next + ten years. + + Returns: + Clean(er) version of the yearly forecasted demand by Planning Area. + """ + df = _pre_process( + raw_ferc714__yearly_planning_area_forecast_demand, + table_name="out_ferc714__yearly_planning_area_forecast_demand", + ) + + # TBD + + return df From 4e9b9b22a9d5c4641bd2ab82fc3d31c3f237a338 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 14 Jun 2024 03:18:47 +0000 Subject: [PATCH 02/22] [pre-commit.ci] auto fixes from pre-commit.com hooks For more information, see https://pre-commit.ci --- src/pudl/transform/ferc714.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/pudl/transform/ferc714.py b/src/pudl/transform/ferc714.py index 90bc621ae1..1e2a34ea26 100644 --- a/src/pudl/transform/ferc714.py +++ b/src/pudl/transform/ferc714.py @@ -534,12 +534,13 @@ def out_ferc714__hourly_planning_area_demand( ) return df + @asset( io_manager_key="parquet_io_manager", - op_tags={"memory-use": "high"}, # Should this be high? + op_tags={"memory-use": "high"}, # Should this be high? compute_kind="pandas", ) -def out_ferc714__yearly_planning_area_forecast_demand( # What is a planning area? +def out_ferc714__yearly_planning_area_forecast_demand( # What is a planning area? raw_ferc714__yearly_planning_area_forecast_demand: pd.DataFrame, ) -> pd.DataFrame: """Transform the yearly planning area forecast data per Planning Area. From 1e1a420b2afdb572bda9bad9c5b0779a1ae35eb1 Mon Sep 17 00:00:00 2001 From: sam Date: Thu, 13 Jun 2024 23:19:41 -0400 Subject: [PATCH 03/22] Add columns to be renamed --- src/pudl/transform/ferc714.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/pudl/transform/ferc714.py b/src/pudl/transform/ferc714.py index 90bc621ae1..dc75a35411 100644 --- a/src/pudl/transform/ferc714.py +++ b/src/pudl/transform/ferc714.py @@ -284,6 +284,10 @@ "report_yr": "report_year", "respondent_id": "respondent_id_ferc714", }, + "out_ferc714__yearly_planning_area_forecast_demand": { + "report_yr": "report_year", + "respondent_id": "respondent_id_ferc714", + }, } From e9319a8ad66cadab8b22b049a478259aed1edc27 Mon Sep 17 00:00:00 2001 From: sam Date: Tue, 18 Jun 2024 23:07:07 -0400 Subject: [PATCH 04/22] Cleanup transform script and add schema changes --- src/pudl/metadata/fields.py | 9 +++++++ src/pudl/metadata/resources/ferc714.py | 21 ++++++++++++++++ src/pudl/transform/ferc714.py | 33 +++++++++++++++++++++----- 3 files changed, 57 insertions(+), 6 deletions(-) diff --git a/src/pudl/metadata/fields.py b/src/pudl/metadata/fields.py index 5073dd8242..509b347c7d 100644 --- a/src/pudl/metadata/fields.py +++ b/src/pudl/metadata/fields.py @@ -1553,6 +1553,10 @@ "type": "boolean", "description": "Indicates whether the boiler is capable of re-injecting fly ash.", }, + "forecast_year": { + "type": "integer", + "description": "Four-digit year that applies to a particular forecasted value.", + }, "fraction_owned": { "type": "number", "description": "Proportion of generator ownership attributable to this utility.", @@ -2425,6 +2429,11 @@ ), "unit": "MW", }, + "net_demand_mwh": { + "type": "number", + "description": "Net electricity demand for the specified period in megawatt-hours (MWh).", + "unit": "MWh", + }, "net_generation_adjusted_mwh": { "type": "number", "description": "Reported net generation adjusted by EIA to reflect non-physical commercial transfers through pseudo-ties and dynamic scheduling.", diff --git a/src/pudl/metadata/resources/ferc714.py b/src/pudl/metadata/resources/ferc714.py index 1e144e5903..47b4ff9241 100644 --- a/src/pudl/metadata/resources/ferc714.py +++ b/src/pudl/metadata/resources/ferc714.py @@ -97,6 +97,27 @@ "field_namespace": "ferc714", "etl_group": "outputs", }, + "out_ferc714__yearly_planning_area_forecast_demand": { + "description": ( + "10-year forecasted summer and winter peak demand and annual net energy per planning area. FERC Form 714, Part III, " + "Schedule 2b." + ), + "schema": { + "fields": [ + "respondent_id_ferc714", + "report_year", + "forecast_year", + "summer_peak_demand_mw", + "winter_peak_demand_mw", + "net_demand_mwh", + ], + "primary_key": ["respondent_id_ferc714", "report_year", "forecast_year"], + }, + "sources": ["ferc714"], + "field_namespace": "ferc714", + "etl_group": "ferc714", + "create_database_schema": False, + }, } """FERC Form 714 resource attributes by PUDL identifier (``resource.name``). diff --git a/src/pudl/transform/ferc714.py b/src/pudl/transform/ferc714.py index c6d97b8da8..9d419537e7 100644 --- a/src/pudl/transform/ferc714.py +++ b/src/pudl/transform/ferc714.py @@ -285,8 +285,12 @@ "respondent_id": "respondent_id_ferc714", }, "out_ferc714__yearly_planning_area_forecast_demand": { - "report_yr": "report_year", "respondent_id": "respondent_id_ferc714", + "report_yr": "report_year", + "plan_year": "forecast_year", + "summer_forecast": "summer_peak_demand_mw", + "winter_forecast": "winter_peak_demand_mw", + "net_energy_forecast": "net_demand_mwh", }, } @@ -544,30 +548,47 @@ def out_ferc714__hourly_planning_area_demand( op_tags={"memory-use": "high"}, # Should this be high? compute_kind="pandas", ) -def out_ferc714__yearly_planning_area_forecast_demand( # What is a planning area? +def out_ferc714__yearly_planning_area_forecast_demand( raw_ferc714__yearly_planning_area_forecast_demand: pd.DataFrame, ) -> pd.DataFrame: """Transform the yearly planning area forecast data per Planning Area. Transformations include: - - Drop unnecessary columns. - - TBD + - Drop/rename columns. Args: raw_ferc714__yearly_planning_area_forecast_demand: Raw table containing, - for each year and each entity, the forecasted summer and winter peak demand, + for each year and each planning area, the forecasted summer and winter peak demand, in megawatts, and annual net energy for load, in megawatthours, for the next ten years. Returns: Clean(er) version of the yearly forecasted demand by Planning Area. """ + # Clean up columns df = _pre_process( raw_ferc714__yearly_planning_area_forecast_demand, table_name="out_ferc714__yearly_planning_area_forecast_demand", ) - # TBD + # Check all data types and columns to ensure consistency with defined schema + df = _post_process( + df, table_name="out_ferc714__yearly_planning_area_forecast_demand" + ) return df + +# EVERYTHING BELOW WILL COME OUT - JUST FOR LOCAL DEV +# Get the value of DAGSTER_HOME from environment variables +import os +dagster_home = os.getenv('DAGSTER_HOME') + +# Define the file name +file_name = "storage/raw_ferc714__yearly_planning_area_forecast_demand" + +# Construct the full file path +file_path = os.path.join(dagster_home, file_name) +# Load the pickle file into a DataFrame +df = pd.read_pickle(file_path) +out_ferc714__yearly_planning_area_forecast_demand(df) \ No newline at end of file From 967f199a95c8da3acc25549fe2ea812c7093827f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 19 Jun 2024 03:08:22 +0000 Subject: [PATCH 05/22] [pre-commit.ci] auto fixes from pre-commit.com hooks For more information, see https://pre-commit.ci --- src/pudl/transform/ferc714.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/pudl/transform/ferc714.py b/src/pudl/transform/ferc714.py index 9d419537e7..395e274647 100644 --- a/src/pudl/transform/ferc714.py +++ b/src/pudl/transform/ferc714.py @@ -579,10 +579,12 @@ def out_ferc714__yearly_planning_area_forecast_demand( return df + # EVERYTHING BELOW WILL COME OUT - JUST FOR LOCAL DEV # Get the value of DAGSTER_HOME from environment variables import os -dagster_home = os.getenv('DAGSTER_HOME') + +dagster_home = os.getenv("DAGSTER_HOME") # Define the file name file_name = "storage/raw_ferc714__yearly_planning_area_forecast_demand" @@ -591,4 +593,4 @@ def out_ferc714__yearly_planning_area_forecast_demand( file_path = os.path.join(dagster_home, file_name) # Load the pickle file into a DataFrame df = pd.read_pickle(file_path) -out_ferc714__yearly_planning_area_forecast_demand(df) \ No newline at end of file +out_ferc714__yearly_planning_area_forecast_demand(df) From 526ac08dee62d8afea7a99d1eebe8dc5bff28206 Mon Sep 17 00:00:00 2001 From: sam Date: Wed, 19 Jun 2024 08:43:10 -0400 Subject: [PATCH 06/22] Remove temp local dev logic --- src/pudl/transform/ferc714.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/pudl/transform/ferc714.py b/src/pudl/transform/ferc714.py index 9d419537e7..0f7ca56834 100644 --- a/src/pudl/transform/ferc714.py +++ b/src/pudl/transform/ferc714.py @@ -578,17 +578,3 @@ def out_ferc714__yearly_planning_area_forecast_demand( ) return df - -# EVERYTHING BELOW WILL COME OUT - JUST FOR LOCAL DEV -# Get the value of DAGSTER_HOME from environment variables -import os -dagster_home = os.getenv('DAGSTER_HOME') - -# Define the file name -file_name = "storage/raw_ferc714__yearly_planning_area_forecast_demand" - -# Construct the full file path -file_path = os.path.join(dagster_home, file_name) -# Load the pickle file into a DataFrame -df = pd.read_pickle(file_path) -out_ferc714__yearly_planning_area_forecast_demand(df) \ No newline at end of file From 70d170b8213ddd66d952b0998b776bd97d1b6657 Mon Sep 17 00:00:00 2001 From: sam Date: Sun, 23 Jun 2024 16:51:53 -0400 Subject: [PATCH 07/22] Correct job params and improve resource naming --- src/pudl/extract/ferc714.py | 2 +- src/pudl/metadata/resources/ferc714.py | 2 +- src/pudl/transform/ferc714.py | 18 ++++++++---------- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/src/pudl/extract/ferc714.py b/src/pudl/extract/ferc714.py index ed425bc5d4..6570881f3e 100644 --- a/src/pudl/extract/ferc714.py +++ b/src/pudl/extract/ferc714.py @@ -47,7 +47,7 @@ "name": "Part 3 Schedule 1 - Planning Area Description.csv", "encoding": "iso-8859-1", }, - "yearly_planning_area_forecast_demand": { + "yearly_planning_area_demand_forecast": { "name": "Part 3 Schedule 2 - Planning Area Forecast Demand.csv", "encoding": "utf-8", }, diff --git a/src/pudl/metadata/resources/ferc714.py b/src/pudl/metadata/resources/ferc714.py index 47b4ff9241..56a152aff3 100644 --- a/src/pudl/metadata/resources/ferc714.py +++ b/src/pudl/metadata/resources/ferc714.py @@ -97,7 +97,7 @@ "field_namespace": "ferc714", "etl_group": "outputs", }, - "out_ferc714__yearly_planning_area_forecast_demand": { + "core_ferc714__yearly_planning_area_demand_forecast": { "description": ( "10-year forecasted summer and winter peak demand and annual net energy per planning area. FERC Form 714, Part III, " "Schedule 2b." diff --git a/src/pudl/transform/ferc714.py b/src/pudl/transform/ferc714.py index 0f7ca56834..d24735b7bc 100644 --- a/src/pudl/transform/ferc714.py +++ b/src/pudl/transform/ferc714.py @@ -284,7 +284,7 @@ "report_yr": "report_year", "respondent_id": "respondent_id_ferc714", }, - "out_ferc714__yearly_planning_area_forecast_demand": { + "core_ferc714__yearly_planning_area_demand_forecast": { "respondent_id": "respondent_id_ferc714", "report_yr": "report_year", "plan_year": "forecast_year", @@ -544,12 +544,11 @@ def out_ferc714__hourly_planning_area_demand( @asset( - io_manager_key="parquet_io_manager", - op_tags={"memory-use": "high"}, # Should this be high? + io_manager_key="pudl_io_manager", compute_kind="pandas", ) -def out_ferc714__yearly_planning_area_forecast_demand( - raw_ferc714__yearly_planning_area_forecast_demand: pd.DataFrame, +def core_ferc714__yearly_planning_area_demand_forecast( + raw_ferc714__yearly_planning_area_demand_forecast: pd.DataFrame, ) -> pd.DataFrame: """Transform the yearly planning area forecast data per Planning Area. @@ -558,7 +557,7 @@ def out_ferc714__yearly_planning_area_forecast_demand( - Drop/rename columns. Args: - raw_ferc714__yearly_planning_area_forecast_demand: Raw table containing, + raw_ferc714__yearly_planning_area_demand_forecast: Raw table containing, for each year and each planning area, the forecasted summer and winter peak demand, in megawatts, and annual net energy for load, in megawatthours, for the next ten years. @@ -568,13 +567,12 @@ def out_ferc714__yearly_planning_area_forecast_demand( """ # Clean up columns df = _pre_process( - raw_ferc714__yearly_planning_area_forecast_demand, - table_name="out_ferc714__yearly_planning_area_forecast_demand", + raw_ferc714__yearly_planning_area_demand_forecast, + table_name="core_ferc714__yearly_planning_area_demand_forecast", ) # Check all data types and columns to ensure consistency with defined schema df = _post_process( - df, table_name="out_ferc714__yearly_planning_area_forecast_demand" + df, table_name="core_ferc714__yearly_planning_area_demand_forecast" ) - return df From 9925d214178b2d65c7f486a1890bb417d4d00283 Mon Sep 17 00:00:00 2001 From: sam Date: Wed, 10 Jul 2024 13:10:28 -0500 Subject: [PATCH 08/22] Add row count check --- ...98_updated_yearly_planning_area_demand_.py | 37 ++++++++++++++++ src/pudl/metadata/resources/ferc714.py | 2 +- src/pudl/transform/ferc714.py | 43 ++++++++++++++++++- 3 files changed, 80 insertions(+), 2 deletions(-) create mode 100644 migrations/versions/c4e8e24d4a98_updated_yearly_planning_area_demand_.py diff --git a/migrations/versions/c4e8e24d4a98_updated_yearly_planning_area_demand_.py b/migrations/versions/c4e8e24d4a98_updated_yearly_planning_area_demand_.py new file mode 100644 index 0000000000..ea2ad967cb --- /dev/null +++ b/migrations/versions/c4e8e24d4a98_updated_yearly_planning_area_demand_.py @@ -0,0 +1,37 @@ +"""Updated yearly_planning_area_demand_forecast schema + +Revision ID: c4e8e24d4a98 +Revises: b9b6cb1a5405 +Create Date: 2024-07-10 12:57:31.753979 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'c4e8e24d4a98' +down_revision = 'b9b6cb1a5405' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('core_ferc714__yearly_planning_area_demand_forecast', + sa.Column('respondent_id_ferc714', sa.Integer(), nullable=False, comment='FERC Form 714 respondent ID. Note that this ID does not correspond to FERC respondent IDs from other forms.'), + sa.Column('report_year', sa.Integer(), nullable=False, comment='Four-digit year in which the data was reported.'), + sa.Column('forecast_year', sa.Integer(), nullable=False, comment='Four-digit year that applies to a particular forecasted value.'), + sa.Column('summer_peak_demand_mw', sa.Float(), nullable=True, comment='The maximum hourly summer load (for the months of June through September) based on net energy for the system during the reporting year. Net energy for the system is the sum of energy an electric utility needs to satisfy their service area and includes full and partial wholesale requirements customers, and the losses experienced in delivery. The maximum hourly load is determined by the interval in which the 60-minute integrated demand is the greatest.'), + sa.Column('winter_peak_demand_mw', sa.Float(), nullable=True, comment='The maximum hourly winter load (for the months of January through March) based on net energy for the system during the reporting year. Net energy for the system is the sum of energy an electric utility needs to satisfy their service area and includes full and partial wholesale requirements customers, and the losses experienced in delivery. The maximum hourly load is determined by the interval in which the 60-minute integrated demand is the greatest.'), + sa.Column('net_demand_mwh', sa.Float(), nullable=True, comment='Net electricity demand for the specified period in megawatt-hours (MWh).'), + sa.ForeignKeyConstraint(['respondent_id_ferc714'], ['core_ferc714__respondent_id.respondent_id_ferc714'], name=op.f('fk_core_ferc714__yearly_planning_area_demand_forecast_respondent_id_ferc714_core_ferc714__respondent_id')), + sa.PrimaryKeyConstraint('respondent_id_ferc714', 'report_year', 'forecast_year', name=op.f('pk_core_ferc714__yearly_planning_area_demand_forecast')) + ) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('core_ferc714__yearly_planning_area_demand_forecast') + # ### end Alembic commands ### diff --git a/src/pudl/metadata/resources/ferc714.py b/src/pudl/metadata/resources/ferc714.py index 56a152aff3..583bfce9b1 100644 --- a/src/pudl/metadata/resources/ferc714.py +++ b/src/pudl/metadata/resources/ferc714.py @@ -116,7 +116,7 @@ "sources": ["ferc714"], "field_namespace": "ferc714", "etl_group": "ferc714", - "create_database_schema": False, + "create_database_schema": True, }, } """FERC Form 714 resource attributes by PUDL identifier (``resource.name``). diff --git a/src/pudl/transform/ferc714.py b/src/pudl/transform/ferc714.py index d24735b7bc..5cae5334d2 100644 --- a/src/pudl/transform/ferc714.py +++ b/src/pudl/transform/ferc714.py @@ -1,10 +1,13 @@ """Transformation of the FERC Form 714 data.""" +from dataclasses import dataclass + import re import numpy as np import pandas as pd -from dagster import asset +from dagster import AssetCheckResult, AssetChecksDefinition, asset, asset_check + import pudl.logging_helpers from pudl.metadata import PUDL_PACKAGE @@ -576,3 +579,41 @@ def core_ferc714__yearly_planning_area_demand_forecast( df, table_name="core_ferc714__yearly_planning_area_demand_forecast" ) return df + +@dataclass +class Ferc714CheckSpec: + """Define some simple checks that can run on FERC 714 assets.""" + + name: str + asset: str + num_rows_by_report_year: dict[int, int] + +check_specs = [ + Ferc714CheckSpec( + name="yearly_planning_area_demand_forecast_check_spec", + asset="core_ferc714__yearly_planning_area_demand_forecast", + num_rows_by_report_year={2019: 950, 2020: 950}, + ) +] + + +def make_check(spec: Ferc714CheckSpec) -> AssetChecksDefinition: + """Turn the Ferc714CheckSpec into an actual Dagster asset check.""" + + @asset_check(asset=spec.asset) + def _check(df): + errors = [] + for year, expected_rows in spec.num_rows_by_report_year.items(): + if (num_rows := len(df.loc[df.report_year == year])) != expected_rows: + errors.append( + f"Expected {expected_rows} for report year {year}, found {num_rows}" + ) + if errors: + return AssetCheckResult(passed=False, metadata={"errors": errors}) + + return AssetCheckResult(passed=True) + + return _check + + +_checks = [make_check(spec) for spec in check_specs] From a8142641b4fd792fcf9bb35584556b6163bef49d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 10 Jul 2024 18:11:09 +0000 Subject: [PATCH 09/22] [pre-commit.ci] auto fixes from pre-commit.com hooks For more information, see https://pre-commit.ci --- src/pudl/transform/ferc714.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pudl/transform/ferc714.py b/src/pudl/transform/ferc714.py index 5cae5334d2..6c046aa037 100644 --- a/src/pudl/transform/ferc714.py +++ b/src/pudl/transform/ferc714.py @@ -1,14 +1,12 @@ """Transformation of the FERC Form 714 data.""" -from dataclasses import dataclass - import re +from dataclasses import dataclass import numpy as np import pandas as pd from dagster import AssetCheckResult, AssetChecksDefinition, asset, asset_check - import pudl.logging_helpers from pudl.metadata import PUDL_PACKAGE @@ -580,6 +578,7 @@ def core_ferc714__yearly_planning_area_demand_forecast( ) return df + @dataclass class Ferc714CheckSpec: """Define some simple checks that can run on FERC 714 assets.""" @@ -588,6 +587,7 @@ class Ferc714CheckSpec: asset: str num_rows_by_report_year: dict[int, int] + check_specs = [ Ferc714CheckSpec( name="yearly_planning_area_demand_forecast_check_spec", From ec87d3329d51bf5b46e89534a93d32de72308faf Mon Sep 17 00:00:00 2001 From: sam Date: Sat, 13 Jul 2024 08:53:04 -0400 Subject: [PATCH 10/22] Add logic to handle duplicate report years --- src/pudl/transform/ferc714.py | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/pudl/transform/ferc714.py b/src/pudl/transform/ferc714.py index 5cae5334d2..6338a85351 100644 --- a/src/pudl/transform/ferc714.py +++ b/src/pudl/transform/ferc714.py @@ -574,6 +574,15 @@ def core_ferc714__yearly_planning_area_demand_forecast( table_name="core_ferc714__yearly_planning_area_demand_forecast", ) + # For any rows with non-unique respondent_id_ferc714/report_year/forecast_year, + # group and take the mean measures + # For the 2006-2020 data, there were only 20 such rows. In most cases, demand metrics were identical. + # But for some, demand metrics were different - thus the need to take the average. + logger.info("Removing non-unique report rows and taking the average of non-equal metrics.") + df = df.groupby( + ["respondent_id_ferc714", "report_year", "forecast_year"] + )[["summer_peak_demand_mw", "winter_peak_demand_mw", "net_demand_mwh"]].mean().reset_index() + # Check all data types and columns to ensure consistency with defined schema df = _post_process( df, table_name="core_ferc714__yearly_planning_area_demand_forecast" @@ -592,7 +601,23 @@ class Ferc714CheckSpec: Ferc714CheckSpec( name="yearly_planning_area_demand_forecast_check_spec", asset="core_ferc714__yearly_planning_area_demand_forecast", - num_rows_by_report_year={2019: 950, 2020: 950}, + num_rows_by_report_year = { + 2006: 1829, + 2007: 1570, + 2008: 1540, + 2009: 1269, + 2010: 1259, + 2011: 1210, + 2012: 1210, + 2013: 1192, + 2014: 1000, + 2015: 990, + 2016: 990, + 2017: 980, + 2018: 961, + 2019: 950, + 2020: 950 + }, ) ] From 77f2bfdf50eeba3e3b73ac63f4ca3ecefb27322b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 13 Jul 2024 12:53:37 +0000 Subject: [PATCH 11/22] [pre-commit.ci] auto fixes from pre-commit.com hooks For more information, see https://pre-commit.ci --- src/pudl/transform/ferc714.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/pudl/transform/ferc714.py b/src/pudl/transform/ferc714.py index 4b87b99145..67588cadcb 100644 --- a/src/pudl/transform/ferc714.py +++ b/src/pudl/transform/ferc714.py @@ -576,10 +576,16 @@ def core_ferc714__yearly_planning_area_demand_forecast( # group and take the mean measures # For the 2006-2020 data, there were only 20 such rows. In most cases, demand metrics were identical. # But for some, demand metrics were different - thus the need to take the average. - logger.info("Removing non-unique report rows and taking the average of non-equal metrics.") - df = df.groupby( - ["respondent_id_ferc714", "report_year", "forecast_year"] - )[["summer_peak_demand_mw", "winter_peak_demand_mw", "net_demand_mwh"]].mean().reset_index() + logger.info( + "Removing non-unique report rows and taking the average of non-equal metrics." + ) + df = ( + df.groupby(["respondent_id_ferc714", "report_year", "forecast_year"])[ + ["summer_peak_demand_mw", "winter_peak_demand_mw", "net_demand_mwh"] + ] + .mean() + .reset_index() + ) # Check all data types and columns to ensure consistency with defined schema df = _post_process( @@ -601,7 +607,7 @@ class Ferc714CheckSpec: Ferc714CheckSpec( name="yearly_planning_area_demand_forecast_check_spec", asset="core_ferc714__yearly_planning_area_demand_forecast", - num_rows_by_report_year = { + num_rows_by_report_year={ 2006: 1829, 2007: 1570, 2008: 1540, @@ -616,7 +622,7 @@ class Ferc714CheckSpec: 2017: 980, 2018: 961, 2019: 950, - 2020: 950 + 2020: 950, }, ) ] From 81e8852456557f45dc6f093e6973c78bcb89c544 Mon Sep 17 00:00:00 2001 From: sam Date: Sat, 13 Jul 2024 09:27:46 -0400 Subject: [PATCH 12/22] Add documentation --- src/pudl/transform/ferc714.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pudl/transform/ferc714.py b/src/pudl/transform/ferc714.py index 4b87b99145..15b98acc70 100644 --- a/src/pudl/transform/ferc714.py +++ b/src/pudl/transform/ferc714.py @@ -556,6 +556,7 @@ def core_ferc714__yearly_planning_area_demand_forecast( Transformations include: - Drop/rename columns. + - Remove duplicate rows and average out the metrics. Args: raw_ferc714__yearly_planning_area_demand_forecast: Raw table containing, From fd14e73920daedc3f9dd09b46541ef369071b04f Mon Sep 17 00:00:00 2001 From: sam Date: Sun, 14 Jul 2024 22:22:19 -0400 Subject: [PATCH 13/22] Update docs and check dupe rows removed --- docs/release_notes.rst | 6 ++++++ src/pudl/transform/ferc714.py | 16 +++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/docs/release_notes.rst b/docs/release_notes.rst index c2263a28d9..09e2615285 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -27,6 +27,12 @@ EIA 860 regarding energy storage and solar generators. See issue :issue:`3676` and PR :pr:`3681`. +FERC 714 + +* Added :ref:`core_ferc714__yearly_planning_area_demand_forecast` based on FERC +Form 714, Part III, Schedule 2b. Data includes forecasted demand and net energy load. +See issue :issue:`3519` and PR :pr:`3670`. + Data Cleaning ^^^^^^^^^^^^^ * When ``generator_operating_date`` values are too inconsistent to be harvested diff --git a/src/pudl/transform/ferc714.py b/src/pudl/transform/ferc714.py index 76fb44b98e..efe55f7c4e 100644 --- a/src/pudl/transform/ferc714.py +++ b/src/pudl/transform/ferc714.py @@ -580,6 +580,10 @@ def core_ferc714__yearly_planning_area_demand_forecast( logger.info( "Removing non-unique report rows and taking the average of non-equal metrics." ) + + # Grab the number of rows before duplicate cleanup + num_rows_before = len(df) + df = ( df.groupby(["respondent_id_ferc714", "report_year", "forecast_year"])[ ["summer_peak_demand_mw", "winter_peak_demand_mw", "net_demand_mwh"] @@ -588,6 +592,15 @@ def core_ferc714__yearly_planning_area_demand_forecast( .reset_index() ) + # Capture the number of rows after grouping + num_rows_after = len(df) + + # Add the number of duplicates removed as metadata + num_duplicates_removed = num_rows_before - num_rows_after + logger.info(f"Number of duplicate rows removed: {num_duplicates_removed}") + # Assert that number of removed rows meets expectation + assert (num_duplicates_removed == 20), f"Expected 20 duplicates removed, but found {num_duplicates_removed}" + # Check all data types and columns to ensure consistency with defined schema df = _post_process( df, table_name="core_ferc714__yearly_planning_area_demand_forecast" @@ -632,7 +645,7 @@ class Ferc714CheckSpec: def make_check(spec: Ferc714CheckSpec) -> AssetChecksDefinition: """Turn the Ferc714CheckSpec into an actual Dagster asset check.""" - @asset_check(asset=spec.asset) + @asset_check(asset=spec.asset, blocking=True) def _check(df): errors = [] for year, expected_rows in spec.num_rows_by_report_year.items(): @@ -640,6 +653,7 @@ def _check(df): errors.append( f"Expected {expected_rows} for report year {year}, found {num_rows}" ) + if errors: return AssetCheckResult(passed=False, metadata={"errors": errors}) From cda231fd280064d0902469ae0007ab77b2ad2dd5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 15 Jul 2024 02:22:42 +0000 Subject: [PATCH 14/22] [pre-commit.ci] auto fixes from pre-commit.com hooks For more information, see https://pre-commit.ci --- src/pudl/transform/ferc714.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pudl/transform/ferc714.py b/src/pudl/transform/ferc714.py index efe55f7c4e..3e2218fdbc 100644 --- a/src/pudl/transform/ferc714.py +++ b/src/pudl/transform/ferc714.py @@ -599,7 +599,9 @@ def core_ferc714__yearly_planning_area_demand_forecast( num_duplicates_removed = num_rows_before - num_rows_after logger.info(f"Number of duplicate rows removed: {num_duplicates_removed}") # Assert that number of removed rows meets expectation - assert (num_duplicates_removed == 20), f"Expected 20 duplicates removed, but found {num_duplicates_removed}" + assert ( + num_duplicates_removed == 20 + ), f"Expected 20 duplicates removed, but found {num_duplicates_removed}" # Check all data types and columns to ensure consistency with defined schema df = _post_process( From fc55f477c383d9ba54457d5fabaff9886ca6ca83 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Mon, 15 Jul 2024 02:17:32 -0400 Subject: [PATCH 15/22] Apply suggestions from code review Fix some RST formatting issues in release notes --- docs/release_notes.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/release_notes.rst b/docs/release_notes.rst index 09e2615285..c9551545c1 100644 --- a/docs/release_notes.rst +++ b/docs/release_notes.rst @@ -28,10 +28,11 @@ EIA 860 :pr:`3681`. FERC 714 +~~~~~~~~ * Added :ref:`core_ferc714__yearly_planning_area_demand_forecast` based on FERC -Form 714, Part III, Schedule 2b. Data includes forecasted demand and net energy load. -See issue :issue:`3519` and PR :pr:`3670`. + Form 714, Part III, Schedule 2b. Data includes forecasted demand and net energy load. + See issue :issue:`3519` and PR :pr:`3670`. Data Cleaning ^^^^^^^^^^^^^ From c37912683b150d0659d6ec3544df2433823679bd Mon Sep 17 00:00:00 2001 From: sam Date: Tue, 16 Jul 2024 18:34:42 -0400 Subject: [PATCH 16/22] Add temp notebook for analysis --- notebooks/work-in-progress/ferc714-core.ipynb | 538 ++++++++++++++++++ 1 file changed, 538 insertions(+) create mode 100644 notebooks/work-in-progress/ferc714-core.ipynb diff --git a/notebooks/work-in-progress/ferc714-core.ipynb b/notebooks/work-in-progress/ferc714-core.ipynb new file mode 100644 index 0000000000..3b167f522c --- /dev/null +++ b/notebooks/work-in-progress/ferc714-core.ipynb @@ -0,0 +1,538 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "import pandas as pd\n", + "import pyarrow.parquet as pq\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load data\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# Put in the name of the file that you want to load\n", + "ferc714_yearly = \"core_ferc714__yearly_planning_area_demand_forecast\"" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "def path_finder(target_asset_name):\n", + " \"\"\"Returns the path to the target_asset_name and incorporates your local PUDL_OUTPUT value.\"\"\"\n", + " # Find the PUDL_OUTPUT dir path\n", + " bashrc_path = Path.home() / \".bashrc\"\n", + " pudl_output_filepath = None\n", + "\n", + " with Path.open(bashrc_path, \"r\") as file:\n", + " for line in file:\n", + " if line.startswith(\"export PUDL_OUTPUT=\"):\n", + " pudl_output_filepath = line.split(\"=\")[1].strip().strip('\"')\n", + " break\n", + "\n", + " if not pudl_output_filepath:\n", + " print(\"PUDL_OUTPUT not found in .bashrc\")\n", + " return \"\"\n", + "\n", + " # Using the PUDL_OUTPUT dir path, find the target file\n", + " target_asset_filepath = \"\"\n", + "\n", + " pudl_output_path = Path(pudl_output_filepath)\n", + "\n", + " for path in pudl_output_path.rglob(\"*\"):\n", + " if path.is_file() and path.stem == target_asset_name:\n", + " target_asset_filepath = str(path)\n", + " break\n", + "\n", + " print(f\"Target asset filepath: {target_asset_filepath}\")\n", + " return target_asset_filepath\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target asset filepath: /Users/sam/Documents/pudl-data/pudl_output/parquet/core_ferc714__yearly_planning_area_demand_forecast.parquet\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
respondent_id_ferc714report_yearforecast_yearsummer_peak_demand_mwwinter_peak_demand_mwnet_demand_mwh
02200620071108.00.00.0
12200620081141.00.00.0
22200620091173.00.00.0
32200620101261.00.00.0
42200620111292.00.00.0
\n", + "
" + ], + "text/plain": [ + " respondent_id_ferc714 report_year forecast_year summer_peak_demand_mw \\\n", + "0 2 2006 2007 1108.0 \n", + "1 2 2006 2008 1141.0 \n", + "2 2 2006 2009 1173.0 \n", + "3 2 2006 2010 1261.0 \n", + "4 2 2006 2011 1292.0 \n", + "\n", + " winter_peak_demand_mw net_demand_mwh \n", + "0 0.0 0.0 \n", + "1 0.0 0.0 \n", + "2 0.0 0.0 \n", + "3 0.0 0.0 \n", + "4 0.0 0.0 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ferc714_yearly_path = path_finder(ferc714_yearly)\n", + "ferc714_yearly_table = pq.read_table(ferc714_yearly_path)\n", + "ferc714_yearly_df = ferc714_yearly_table.to_pandas()\n", + "ferc714_yearly_df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Initial checks" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Is the data complete?" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "For this FERC 714 form, respondents were expected to provide 10 years' worth of forecasted demand.\n", + "Here we can see that not all respondents provided 10 years' worth each report year:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
respondent_id_ferc714report_yearforecast_year_count
17412520069
108823520099
177032120109
84921120139
93921920133
78720120181
\n", + "
" + ], + "text/plain": [ + " respondent_id_ferc714 report_year forecast_year_count\n", + "174 125 2006 9\n", + "1088 235 2009 9\n", + "1770 321 2010 9\n", + "849 211 2013 9\n", + "939 219 2013 3\n", + "787 201 2018 1" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "counts = ferc714_yearly_df.groupby([\"respondent_id_ferc714\", \"report_year\"]).size().reset_index(name=\"forecast_year_count\").sort_values(by=\"report_year\")\n", + "print(\"For this FERC 714 form, respondents were expected to provide 10 years' worth of forecasted demand.\")\n", + "print(\"Here we can see that not all respondents provided 10 years' worth each report year:\")\n", + "counts[counts[\"forecast_year_count\"] != 10]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Do we see any obvious anomalies?" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/0k/_f6292f54rd6y6x_3m47kq1h0000gp/T/ipykernel_71564/961098268.py:45: FutureWarning: The behavior of DataFrame concatenation with empty or all-NA entries is deprecated. In a future version, this will no longer exclude empty or all-NA columns when determining the result dtypes. To retain the old behavior, exclude the relevant entries before the concat operation.\n", + " anomalies = pd.concat([anomalies, pd.DataFrame({\n" + ] + } + ], + "source": [ + "# Define threshold for anomalies (e.g., more than 100% change)\n", + "threshold = 100.0\n", + "\n", + "# Apply the logic without using a function\n", + "grouped = ferc714_yearly_df.groupby([\"respondent_id_ferc714\", \"report_year\"])\n", + "\n", + "# Initialize an empty DataFrame to collect anomalies\n", + "anomalies = pd.DataFrame(columns=[\n", + " \"respondent_id_ferc714\", \"report_year\", \"forecast_year\",\n", + " \"metric_type\", \"percentage_change\", \"value_in_prior_year\", \"value_in_this_forecast_year\"\n", + "])\n", + "\n", + "for key, group in grouped:\n", + " # Sort by forecast_year\n", + " group = group.sort_values(by=\"forecast_year\")\n", + "\n", + " # Calculate percentage change\n", + " group[\"summer_peak_demand_pct_change\"] = group[\"summer_peak_demand_mw\"].pct_change() * 100\n", + " group[\"winter_peak_demand_pct_change\"] = group[\"winter_peak_demand_mw\"].pct_change() * 100\n", + " group[\"net_demand_pct_change\"] = group[\"net_demand_mwh\"].pct_change() * 100\n", + "\n", + " # Check for anomalies and append to the result DataFrame\n", + " for index, row in group.iterrows():\n", + " if abs(row[\"summer_peak_demand_pct_change\"]) > threshold:\n", + " anomalies = pd.concat([anomalies, pd.DataFrame({\n", + " \"respondent_id_ferc714\": [row[\"respondent_id_ferc714\"]],\n", + " \"report_year\": [row[\"report_year\"]],\n", + " \"forecast_year\": [row[\"forecast_year\"]],\n", + " \"metric_type\": [\"summer_peak_demand_mw\"],\n", + " \"percentage_change\": [row[\"summer_peak_demand_pct_change\"]],\n", + " \"value_in_prior_year\": [group.loc[index-1, \"summer_peak_demand_mw\"]] if index > 0 else [None],\n", + " \"value_in_this_forecast_year\": [row[\"summer_peak_demand_mw\"]]\n", + " })], ignore_index=True)\n", + " if abs(row[\"winter_peak_demand_pct_change\"]) > threshold:\n", + " anomalies = pd.concat([anomalies, pd.DataFrame({\n", + " \"respondent_id_ferc714\": [row[\"respondent_id_ferc714\"]],\n", + " \"report_year\": [row[\"report_year\"]],\n", + " \"forecast_year\": [row[\"forecast_year\"]],\n", + " \"metric_type\": [\"winter_peak_demand_mw\"],\n", + " \"percentage_change\": [row[\"winter_peak_demand_pct_change\"]],\n", + " \"value_in_prior_year\": [group.loc[index-1, \"winter_peak_demand_mw\"]] if index > 0 else [None],\n", + " \"value_in_this_forecast_year\": [row[\"winter_peak_demand_mw\"]]\n", + " })], ignore_index=True)\n", + " if abs(row[\"net_demand_pct_change\"]) > threshold:\n", + " anomalies = pd.concat([anomalies, pd.DataFrame({\n", + " \"respondent_id_ferc714\": [row[\"respondent_id_ferc714\"]],\n", + " \"report_year\": [row[\"report_year\"]],\n", + " \"forecast_year\": [row[\"forecast_year\"]],\n", + " \"metric_type\": [\"net_demand_mwh\"],\n", + " \"percentage_change\": [row[\"net_demand_pct_change\"]],\n", + " \"value_in_prior_year\": [group.loc[index-1, \"net_demand_mwh\"]] if index > 0 else [None],\n", + " \"value_in_this_forecast_year\": [row[\"net_demand_mwh\"]]\n", + " })], ignore_index=True)\n", + "\n", + "# Reset index for the final anomalies DataFrame\n", + "anomalies = anomalies.reset_index(drop=True)\n", + "anomalies[\"respondent_id_ferc714\"] = anomalies[\"respondent_id_ferc714\"].astype(int)\n", + "anomalies[\"report_year\"] = anomalies[\"report_year\"].astype(int)\n", + "anomalies[\"forecast_year\"] = anomalies[\"forecast_year\"].astype(int)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
respondent_id_ferc714report_yearforecast_yearmetric_typepercentage_changevalue_in_prior_yearvalue_in_this_forecast_year
012020062013net_demand_mwh514.3953251928828.011850629.0
112520172024winter_peak_demand_mw944.2966313384.035339.0
213420072009winter_peak_demand_mw9786.792969106.010480.0
315920062008winter_peak_demand_mwinf0.02848.0
415920062008net_demand_mwhinf0.013399136.0
\n", + "
" + ], + "text/plain": [ + " respondent_id_ferc714 report_year forecast_year metric_type \\\n", + "0 120 2006 2013 net_demand_mwh \n", + "1 125 2017 2024 winter_peak_demand_mw \n", + "2 134 2007 2009 winter_peak_demand_mw \n", + "3 159 2006 2008 winter_peak_demand_mw \n", + "4 159 2006 2008 net_demand_mwh \n", + "\n", + " percentage_change value_in_prior_year value_in_this_forecast_year \n", + "0 514.395325 1928828.0 11850629.0 \n", + "1 944.296631 3384.0 35339.0 \n", + "2 9786.792969 106.0 10480.0 \n", + "3 inf 0.0 2848.0 \n", + "4 inf 0.0 13399136.0 " + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "anomalies.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pudl-dev", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 6ddbcdc11e33109354fe5005f86bdfb76ad38bd3 Mon Sep 17 00:00:00 2001 From: sam Date: Thu, 18 Jul 2024 19:07:25 -0400 Subject: [PATCH 17/22] Fix migration --- migrations/versions/0f594b84fbe7_.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 migrations/versions/0f594b84fbe7_.py diff --git a/migrations/versions/0f594b84fbe7_.py b/migrations/versions/0f594b84fbe7_.py new file mode 100644 index 0000000000..9a53468c09 --- /dev/null +++ b/migrations/versions/0f594b84fbe7_.py @@ -0,0 +1,24 @@ +"""empty message + +Revision ID: 0f594b84fbe7 +Revises: 2c52a938f5cc, c4e8e24d4a98 +Create Date: 2024-07-18 19:06:38.337919 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '0f594b84fbe7' +down_revision = ('2c52a938f5cc', 'c4e8e24d4a98') +branch_labels = None +depends_on = None + + +def upgrade() -> None: + pass + + +def downgrade() -> None: + pass From 3bceadab9031dc00b4302bafeb0fb260e667a7ae Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Fri, 19 Jul 2024 00:01:13 -0600 Subject: [PATCH 18/22] Check upper bound on duplicate rows, not exact number. --- src/pudl/transform/ferc714.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pudl/transform/ferc714.py b/src/pudl/transform/ferc714.py index 3e2218fdbc..5c1ee2839b 100644 --- a/src/pudl/transform/ferc714.py +++ b/src/pudl/transform/ferc714.py @@ -600,7 +600,7 @@ def core_ferc714__yearly_planning_area_demand_forecast( logger.info(f"Number of duplicate rows removed: {num_duplicates_removed}") # Assert that number of removed rows meets expectation assert ( - num_duplicates_removed == 20 + num_duplicates_removed <= 20 ), f"Expected 20 duplicates removed, but found {num_duplicates_removed}" # Check all data types and columns to ensure consistency with defined schema From 8f5f4478a1d98ff3a3706379732e68919ee11b01 Mon Sep 17 00:00:00 2001 From: sam Date: Fri, 19 Jul 2024 11:47:29 -0400 Subject: [PATCH 19/22] Modify assertion --- src/pudl/transform/ferc714.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pudl/transform/ferc714.py b/src/pudl/transform/ferc714.py index 3e2218fdbc..01b15e6932 100644 --- a/src/pudl/transform/ferc714.py +++ b/src/pudl/transform/ferc714.py @@ -600,8 +600,8 @@ def core_ferc714__yearly_planning_area_demand_forecast( logger.info(f"Number of duplicate rows removed: {num_duplicates_removed}") # Assert that number of removed rows meets expectation assert ( - num_duplicates_removed == 20 - ), f"Expected 20 duplicates removed, but found {num_duplicates_removed}" + num_duplicates_removed <= 20 + ), f"Expected no more than 20 duplicates removed, but found {num_duplicates_removed}" # Check all data types and columns to ensure consistency with defined schema df = _post_process( From b2fffd32688afdbacfa6239cc75ef219c7ec618b Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Fri, 19 Jul 2024 10:30:00 -0600 Subject: [PATCH 20/22] Disable build-distribution check on forks. --- .github/workflows/release.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5b845cf479..59eeea19c3 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -5,6 +5,8 @@ on: push jobs: build-distribution: + name: Build catalystcoop.pudl distribution for PyPI + if: github.repository == ‘catalyst-cooperative/pudl’ runs-on: ubuntu-latest steps: - name: Checkout source From b95c868cd89d38e76f147d1c1ffa7077b5ac5889 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Fri, 19 Jul 2024 20:35:56 -0600 Subject: [PATCH 21/22] Empty commit to retrigger GitHub checks From 0aa9d110e33f5d8e13170e5fbed4891a1cac45a4 Mon Sep 17 00:00:00 2001 From: Zane Selvans Date: Fri, 19 Jul 2024 20:49:33 -0600 Subject: [PATCH 22/22] Remove conditional around build-distribution as it didn't work. --- .github/workflows/release.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 59eeea19c3..2e2e64dbee 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -6,7 +6,6 @@ on: push jobs: build-distribution: name: Build catalystcoop.pudl distribution for PyPI - if: github.repository == ‘catalyst-cooperative/pudl’ runs-on: ubuntu-latest steps: - name: Checkout source