#860 display warning/error when csv input contains a observation valu…

…e column titled `value` (#882) * New exception created and external docs updated * Purl link added
ONSdigital · Aug 30, 2023 · a79d05a · a79d05a
1 parent 5361fcf
commit a79d05a
Show file tree

Hide file tree

Showing 6 changed files with 34 additions and 58 deletions.
diff --git a/external-docs/docs/guides/errors/inspect-command-errors/index.md b/external-docs/docs/guides/errors/inspect-command-errors/index.md
@@ -14,3 +14,4 @@
 * [An Error Occurred When Processing Dataframe](./error-processing-dataframe)
 * [Failed to Convert Dataframe to String](./dataframe-to-string-convert-failed.md)
 * [Feature Not Yet Supported](./feature-not-supported.md)
+* [Invalid Observation Value Column Entitled "Value"](./invalid-obs-val-col-entitled-value.md)
diff --git a/...docs/guides/errors/inspect-command-errors/invalid-obs-val-col-entitled-value.md b/...docs/guides/errors/inspect-command-errors/invalid-obs-val-col-entitled-value.md
@@ -0,0 +1,9 @@
+# Error - Invalid observation value column entitled "Value"
+
+## When it occurs
+
+In a pivoted shape cube, observation value columns may not be entitled "Value".
+
+## How to fix
+
+If your data set is in the pivoted shape, rename the observation value column(s) to something other than "Value". See the [pivoted shape](../../shape-data/pivoted-shape.md) documentation for more details.
diff --git a/external-docs/docs/guides/shape-data/pivoted-shape.md b/external-docs/docs/guides/shape-data/pivoted-shape.md
@@ -4,6 +4,10 @@
 
 See [Converting to pivoted shape](./shape-conversion.md#converting-to-the-pivoted-shape) for instructions on how to convert the shape of your data in Python and R.
 
+## Observation value column titles
+
+In a pivoted shape data set, the observation value column titles must not be entitled "Value". If you attempt to inspect a data cube built using csvcubed where one or more observation columns are entitled "Value", this will raise an [error](../../guides/errors/inspect-command-errors/invalid-obs-val-col-entitled-value.md).
+
 ## Single Measure
 
 The [standard shape](./standard-shape.md) is flexible but has a lot of redundancy which can often be removed by using the more concise pivoted form. Our data set on the distribution of the number of Arthur's Bakes stores can be expressed in the pivoted shape as follows:

diff --git a/src/csvcubed/models/csvcubedexception.py b/src/csvcubed/models/csvcubedexception.py
@@ -68,6 +68,8 @@ class CsvcubedExceptionMsges(Enum):
 
     InvalidNumOfDSDComponentsForObsValColTitle = "There should be only 1 component for the observation value column with title '{obs_val_col_title}', but found {num_of_components}."
 
+    InvalidObsValColTitle = "The title of an observation value column in a pivoted shape cube cannot be 'Value'. Please rename the column."
+
 
 class CsvcubedExceptionUrls(Enum):
     """
@@ -127,6 +129,7 @@ class CsvcubedExceptionUrls(Enum):
     InvalidNumOfDSDComponentsForObsValColTitle = (
         "http://purl.org/csv-cubed/err/invalid-num-of-dsd-comps-for-obs-val-col"
     )
+    InvalidObsValColTitle = "http://purl.org/csv-cubed/err/obs-val-col-entitled-value"
 
 
 class CsvcubedException(Exception, HasErrorUrl, ABC):
@@ -418,3 +421,14 @@ def __init__(self, obs_val_col_title: str, num_of_components: int):
     @classmethod
     def get_error_url(cls) -> str:
         return CsvcubedExceptionUrls.InvalidNumOfDSDComponentsForObsValColTitle.value
+
+
+class InvalidObsValColTitleException(CsvcubedException):
+    """Class representing the InvalidObsValColTitleException model."""
+
+    def __init__(self):
+        super().__init__(CsvcubedExceptionMsges.InvalidObsValColTitle.value)
+
+    @classmethod
+    def get_error_url(cls) -> str:
+        return CsvcubedExceptionUrls.InvalidObsValColTitle.value
diff --git a/src/csvcubed/utils/csvdataset.py b/src/csvcubed/utils/csvdataset.py
@@ -20,6 +20,7 @@
     InvalidNumberOfRecordsException,
     InvalidNumOfDSDComponentsForObsValColTitleException,
     InvalidObservationColumnTitle,
+    InvalidObsValColTitleException,
     InvalidUnitColumnDefinition,
 )
 from csvcubed.models.cube.cube_shape import CubeShape
@@ -306,30 +307,19 @@ def _melt_data_set(
     ]
     id_cols = list(set(data_set.columns) - set(value_cols))
 
-    # Checking for any columns with the title "Value" and changing the value_name
-    # parameter passed to the melt function to a random string so that we don't
-    # trigger a pandas ValueError.
-    value_name = "Value"
-    rand_value_name = f"Value_{str(uuid1())}"
-    for col_title in value_cols:
-        if col_title == "Value":
-            value_name = rand_value_name
+    # Raise an exception if any observation columns are entitled 'Value'
+    if "Value" in value_cols:
+        raise InvalidObsValColTitleException()
 
     # Melting the data set using pandas melt function.
-    melted_df = pd.melt(
+    return pd.melt(
         data_set,
         id_vars=id_cols,
         value_vars=value_cols,
-        value_name=value_name,
+        value_name="Value",
         var_name="Observation Value",
     )
 
-    # Renaming columns in the returned melted df to their original title "Value"
-    if value_name == rand_value_name:
-        melted_df.rename(columns={rand_value_name: "Value"}, inplace=True)
-
-    return melted_df
-
 
 def _get_unit_measure_col_for_standard_shape_cube(
     qube_components: List[QubeComponentResult],

diff --git a/tests/unit/inspect/test_inspectdatasetmanager.py b/tests/unit/inspect/test_inspectdatasetmanager.py
@@ -840,45 +840,3 @@ def test_get_concepts_hierarchy_info_hierarchy_with_depth_more_than_one():
     assert isinstance(result.tree, Tree)
     assert result.tree.depth() == 2
     assert len(result.tree.all_nodes_itr()) == 10
-
-
-def test_melt_works_when_column_titled_value_exists():
-    """
-    When a df with a column titled "Value" is passed to the melt function in
-    pandas 2.0 a ValueError is produced because of the column title. This test
-    uses a dataset with a "Value" titled column and should be melted when
-    transform_dataset_to_canonical_shape is called. In this case, the "value_name
-    parameter of melt() is given a different string (default is "Value") and then
-    we rename this column back to "Value" in the returned melted df.
-    """
-    csvw_metadata_json_path = (
-        _test_case_base_dir
-        / "multi-unit_single-measure"
-        / "final-uk-greenhouse-gas-emissions-national-statistics-1990-to-2019.csv-metadata.json"
-    )
-
-    data_cube_repository = get_data_cube_repository(csvw_metadata_json_path)
-
-    (dataset, qube_components, csv_url) = get_arguments_qb_dataset(data_cube_repository)
-
-    (
-        canonical_shape_dataset,
-        measure_col,
-        unit_col,
-    ) = transform_dataset_to_canonical_shape(
-        data_cube_repository,
-        dataset,
-        csv_url,
-        qube_components,
-    )
-
-    result: DatasetObservationsByMeasureUnitInfoResult = get_dataset_val_counts_info(
-        canonical_shape_dataset, measure_col, unit_col
-    )
-
-    assert result is not None
-    assert set(dataset.columns).issubset(set(canonical_shape_dataset.columns))
-    assert_frame_equal(
-        result.by_measure_and_unit_val_counts_df,
-        _expected_by_measure_and_unit_val_counts_df_with_column_titled_value,
-    )