capitalone · taylorfturner · Jul 21, 2022 · Jul 21, 2022 · Jul 21, 2022 · ksneab7
@@ -1,5 +1,10 @@
+"""
+This is the float-column profiler module.
+
+This profiler handles float columns.
+"""
+
 import copy
-import math
 import re
 
 import numpy as np
@@ -12,15 +17,17 @@
 
 class FloatColumn(NumericStatsMixin, BaseColumnPrimitiveTypeProfiler):
     """
-    Float column profile mixin with of numerical stats. Represents a column in
-    the dataset which is a float column.
+    Float column profile mixin with of numerical stats.
+
+    Represents a column in the dataset which is a float column.
     """
 
     type = "float"
 
     def __init__(self, name, options=None):
         """
-        Initialization of column base properties and itself.
+        Initialize column base properties and itself.
+
         :param name: Name of the data
         :type name: String
         :param options: Options for the float column
@@ -57,7 +64,8 @@ def __init__(self, name, options=None):
 
     def __add__(self, other):
         """
-        Merges the properties of two FloatColumn profiles
+        Merge the properties of two FloatColumn profiles.
+
         :param self: first profile
         :param other: second profile
         :type self: FloatColumn
@@ -116,7 +124,7 @@ def __add__(self, other):
 
     def diff(self, other_profile, options=None):
         """
-        Finds the differences for FloatColumns.
+        Find the differences for FloatColumns.
 
         :param other_profile: profile to find the difference with
         :type other_profile: FloatColumn
@@ -135,9 +143,7 @@ def diff(self, other_profile, options=None):
         return differences
 
     def report(self, remove_disabled_flag=False):
-        """Report on profile attribute of the class and pop value
-        from self.profile if key not in self.__calculations
-        """
+        """Report profile attribute of class; potentially pop val from self.profile."""
         calcs_dict_keys = self._FloatColumn__calculations.keys()
         profile = self.profile
 
@@ -154,7 +160,8 @@ def report(self, remove_disabled_flag=False):
     @property
     def profile(self):
         """
-        Property for profile. Returns the profile of the column.
+        Return the profile of the column.
+
         :return:
         """
         profile = NumericStatsMixin.profile(self)
@@ -181,8 +188,8 @@ def profile(self):
     @property
     def precision(self):
         """
-        Property reporting statistics on the significant figures of each
-        element in the data.
+        Report statistics on the significant figures of each element in the data.
+
         :return: Precision statistics
         :rtype: dict
         """
@@ -221,7 +228,8 @@ def precision(self):
     @property
     def data_type_ratio(self):
         """
-        Calculates the ratio of samples which match this data type.
+        Calculate the ratio of samples which match this data type.
+
         :return: ratio of data type
         :rtype: float
         """
@@ -232,7 +240,7 @@ def data_type_ratio(self):
     @classmethod
     def _get_float_precision(cls, df_series_clean, sample_ratio=None):
         """
-        Determines the precision of the numeric value.
+        Determine the precision of the numeric value.
 
         :param df_series_clean: df series with nulls removed, assumes all values
             are floats as well
@@ -278,8 +286,9 @@ def _get_float_precision(cls, df_series_clean, sample_ratio=None):
     @classmethod
     def _is_each_row_float(cls, df_series):
         """
-        Determines if each value in a dataframe is a float. Integers and NaNs
-        can be considered a float.
+        Determine if each value in a dataframe is a float.
+
+        Integers and NaNs can be considered a float.
         e.g.
         For column [1, 1, 1] returns [True, True, True]
         For column [1.0, np.NaN, 1.0] returns [True, True, True]
@@ -298,7 +307,7 @@ def _update_precision(
         self, df_series, prev_dependent_properties, subset_properties
     ):
         """
-        Updates the precision value of the column.
+        Update the precision value of the column.
 
         :param prev_dependent_properties: Contains all the previous properties
         that the calculations depend on.
@@ -346,8 +355,8 @@ def _update_precision(
 
     def _update_helper(self, df_series_clean, profile):
         """
-        Method for updating the column profile properties with a cleaned
-        dataset and the known profile of the dataset.
+        Update column profile properties with cleaned dataset and its known profile.
+
         :param df_series_clean: df series with nulls removed
         :type df_series_clean: pandas.core.series.Series
         :param profile: float profile dictionary
@@ -362,8 +371,9 @@ def _update_numeric_stats(
         self, df_series, prev_dependent_properties, subset_properties
     ):
         """
-        Calls the numeric stats update function. This is a wrapper to allow
-        for modularity.
+        Call the numeric stats update function.
+
+        This is a wrapper to allow for modularity.
         :param prev_dependent_properties: Contains all the previous properties
         that the calculations depend on.
         :type prev_dependent_properties: Dict
@@ -378,7 +388,8 @@ def _update_numeric_stats(
 
     def update(self, df_series):
         """
-        Updates the column profile.
+        Update the column profile.
+
         :param df_series: df series
         :type df_series: pandas.core.series.Series
         :return: None

@@ -1,3 +1,9 @@
+"""
+This is the text-column profiler module.
+
+This profiler handles text columns.
+"""
+
 import itertools
 
 from . import utils
@@ -8,15 +14,16 @@
 
 class TextColumn(NumericStatsMixin, BaseColumnPrimitiveTypeProfiler):
     """
-    Text column profile subclass of BaseColumnProfiler. Represents a column in
-    the dataset which is a text column.
+    Text column profile subclass of BaseColumnProfiler.
+
+    Represents a column in the dataset which is a text column.
     """
 
     type = "text"
 
     def __init__(self, name, options=None):
         """
-        Initialization of column base properties and itself.
+        Initialize column base properties and itself.
 
         :param name: Name of the data
         :type name: String
@@ -35,7 +42,7 @@ def __init__(self, name, options=None):
 
     def __add__(self, other):
         """
-        Merges the properties of two TextColumn profiles
+        Merge properties of two TextColumn profiles.
 
         :param self: first profile
         :param other: second profile
@@ -60,9 +67,7 @@ def __add__(self, other):
         return merged_profile
 
     def report(self, remove_disabled_flag=False):
-        """Report on profile attribute of the class and pop value
-        from self.profile if key not in self.__calculations
-        """
+        """Report profile attribute of class; potentially pop val from self.profile."""
         calcs_dict_keys = self._TextColumn__calculations.keys()
         profile = self.profile
 
@@ -79,7 +84,7 @@ def report(self, remove_disabled_flag=False):
     @property
     def profile(self):
         """
-        Property for profile. Returns the profile of the column.
+        Return the profile of the column.
 
         :return:
         """
@@ -93,7 +98,7 @@ def profile(self):
 
     def diff(self, other_profile, options=None):
         """
-        Finds the differences for text columns
+        Find the differences for text columns.
 
         :param other_profile: profile to find the difference with
         :type other_profile: TextColumn Profile
@@ -108,7 +113,8 @@ def diff(self, other_profile, options=None):
     @property
     def data_type_ratio(self):
         """
-        Calculates the ratio of samples which match this data type.
+        Calculate the ratio of samples which match this data type.
+
         NOTE: all values can be considered string so always returns 1 in this
         case.
 
@@ -122,7 +128,7 @@ def _update_vocab(
         self, data, prev_dependent_properties=None, subset_properties=None
     ):
         """
-        Finds the unique vocabulary used in the text column.
+        Find the unique vocabulary used in the text column.
 
         :param data: list or array of data from which to extract vocab
         :type data: Union[list, numpy.array, pandas.DataFrame]
@@ -134,14 +140,12 @@ def _update_vocab(
         :type subset_properties: dict
         :return: None
         """
-
         data_flat = list(itertools.chain(*data))
         self.vocab = utils._combine_unique_sets(self.vocab, data_flat)
 
     def _update_helper(self, df_series_clean, profile):
         """
-        Method for updating the column profile properties with a cleaned
-        dataset and the known null parameters of the dataset.
+        Update col profile properties with clean dataset and its known null parameters.
 
         :param df_series_clean: df series with nulls removed
         :type df_series_clean: pandas.core.series.Series
@@ -158,7 +162,7 @@ def _update_helper(self, df_series_clean, profile):
 
     def update(self, df_series):
         """
-        Updates the column profile.
+        Update the column profile.
 
         :param df_series: df series
         :type df_series: pandas.core.series.Series