Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reformatted dataprofiler/profilers/float_column_profiler.py and dataprofiler/profilers/text_column_profiler.py using flake8. #549

Merged
merged 2 commits into from
Jul 21, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 33 additions & 22 deletions dataprofiler/profilers/float_column_profile.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
"""
This is the float-column profiler module.

This profiler handles float columns.
"""

import copy
import math
import re

import numpy as np
Expand All @@ -12,15 +17,17 @@

class FloatColumn(NumericStatsMixin, BaseColumnPrimitiveTypeProfiler):
"""
Float column profile mixin with of numerical stats. Represents a column in
the dataset which is a float column.
Float column profile mixin with of numerical stats.
taylorfturner marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This sentence is weird


Represents a column in the dataset which is a float column.
"""

type = "float"

def __init__(self, name, options=None):
"""
Initialization of column base properties and itself.
Initialize column base properties and itself.

:param name: Name of the data
:type name: String
:param options: Options for the float column
Expand Down Expand Up @@ -57,7 +64,8 @@ def __init__(self, name, options=None):

def __add__(self, other):
"""
Merges the properties of two FloatColumn profiles
Merge the properties of two FloatColumn profiles.

:param self: first profile
:param other: second profile
:type self: FloatColumn
Expand Down Expand Up @@ -116,7 +124,7 @@ def __add__(self, other):

def diff(self, other_profile, options=None):
"""
Finds the differences for FloatColumns.
Find the differences for FloatColumns.

:param other_profile: profile to find the difference with
:type other_profile: FloatColumn
Expand All @@ -135,9 +143,7 @@ def diff(self, other_profile, options=None):
return differences

def report(self, remove_disabled_flag=False):
"""Report on profile attribute of the class and pop value
from self.profile if key not in self.__calculations
"""
"""Report profile attribute of class; potentially pop val from self.profile."""
calcs_dict_keys = self._FloatColumn__calculations.keys()
profile = self.profile

Expand All @@ -154,7 +160,8 @@ def report(self, remove_disabled_flag=False):
@property
def profile(self):
"""
Property for profile. Returns the profile of the column.
Return the profile of the column.

:return:
"""
profile = NumericStatsMixin.profile(self)
Expand All @@ -181,8 +188,8 @@ def profile(self):
@property
def precision(self):
"""
Property reporting statistics on the significant figures of each
element in the data.
Report statistics on the significant figures of each element in the data.

:return: Precision statistics
:rtype: dict
"""
Expand Down Expand Up @@ -221,7 +228,8 @@ def precision(self):
@property
def data_type_ratio(self):
"""
Calculates the ratio of samples which match this data type.
Calculate the ratio of samples which match this data type.

:return: ratio of data type
:rtype: float
"""
Expand All @@ -232,7 +240,7 @@ def data_type_ratio(self):
@classmethod
def _get_float_precision(cls, df_series_clean, sample_ratio=None):
"""
Determines the precision of the numeric value.
Determine the precision of the numeric value.

:param df_series_clean: df series with nulls removed, assumes all values
are floats as well
Expand Down Expand Up @@ -278,8 +286,9 @@ def _get_float_precision(cls, df_series_clean, sample_ratio=None):
@classmethod
def _is_each_row_float(cls, df_series):
"""
Determines if each value in a dataframe is a float. Integers and NaNs
can be considered a float.
Determine if each value in a dataframe is a float.

Integers and NaNs can be considered a float.
e.g.
For column [1, 1, 1] returns [True, True, True]
For column [1.0, np.NaN, 1.0] returns [True, True, True]
Expand All @@ -298,7 +307,7 @@ def _update_precision(
self, df_series, prev_dependent_properties, subset_properties
):
"""
Updates the precision value of the column.
Update the precision value of the column.

:param prev_dependent_properties: Contains all the previous properties
that the calculations depend on.
Expand Down Expand Up @@ -346,8 +355,8 @@ def _update_precision(

def _update_helper(self, df_series_clean, profile):
"""
Method for updating the column profile properties with a cleaned
dataset and the known profile of the dataset.
Update column profile properties with cleaned dataset and its known profile.

:param df_series_clean: df series with nulls removed
:type df_series_clean: pandas.core.series.Series
:param profile: float profile dictionary
Expand All @@ -362,8 +371,9 @@ def _update_numeric_stats(
self, df_series, prev_dependent_properties, subset_properties
):
"""
Calls the numeric stats update function. This is a wrapper to allow
for modularity.
Call the numeric stats update function.

This is a wrapper to allow for modularity.
:param prev_dependent_properties: Contains all the previous properties
that the calculations depend on.
:type prev_dependent_properties: Dict
Expand All @@ -378,7 +388,8 @@ def _update_numeric_stats(

def update(self, df_series):
"""
Updates the column profile.
Update the column profile.

:param df_series: df series
:type df_series: pandas.core.series.Series
:return: None
Expand Down
34 changes: 19 additions & 15 deletions dataprofiler/profilers/text_column_profile.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
"""
This is the text-column profiler module.

This profiler handles text columns.
"""

import itertools

from . import utils
Expand All @@ -8,15 +14,16 @@

class TextColumn(NumericStatsMixin, BaseColumnPrimitiveTypeProfiler):
"""
Text column profile subclass of BaseColumnProfiler. Represents a column in
the dataset which is a text column.
Text column profile subclass of BaseColumnProfiler.

Represents a column in the dataset which is a text column.
"""

type = "text"

def __init__(self, name, options=None):
"""
Initialization of column base properties and itself.
Initialize column base properties and itself.

:param name: Name of the data
:type name: String
Expand All @@ -35,7 +42,7 @@ def __init__(self, name, options=None):

def __add__(self, other):
"""
Merges the properties of two TextColumn profiles
Merge properties of two TextColumn profiles.

:param self: first profile
:param other: second profile
Expand All @@ -60,9 +67,7 @@ def __add__(self, other):
return merged_profile

def report(self, remove_disabled_flag=False):
"""Report on profile attribute of the class and pop value
from self.profile if key not in self.__calculations
"""
"""Report profile attribute of class; potentially pop val from self.profile."""
calcs_dict_keys = self._TextColumn__calculations.keys()
profile = self.profile

Expand All @@ -79,7 +84,7 @@ def report(self, remove_disabled_flag=False):
@property
def profile(self):
"""
Property for profile. Returns the profile of the column.
Return the profile of the column.

:return:
"""
Expand All @@ -93,7 +98,7 @@ def profile(self):

def diff(self, other_profile, options=None):
"""
Finds the differences for text columns
Find the differences for text columns.

:param other_profile: profile to find the difference with
:type other_profile: TextColumn Profile
Expand All @@ -108,7 +113,8 @@ def diff(self, other_profile, options=None):
@property
def data_type_ratio(self):
"""
Calculates the ratio of samples which match this data type.
Calculate the ratio of samples which match this data type.

NOTE: all values can be considered string so always returns 1 in this
case.

Expand All @@ -122,7 +128,7 @@ def _update_vocab(
self, data, prev_dependent_properties=None, subset_properties=None
):
"""
Finds the unique vocabulary used in the text column.
Find the unique vocabulary used in the text column.

:param data: list or array of data from which to extract vocab
:type data: Union[list, numpy.array, pandas.DataFrame]
Expand All @@ -134,14 +140,12 @@ def _update_vocab(
:type subset_properties: dict
:return: None
"""

data_flat = list(itertools.chain(*data))
self.vocab = utils._combine_unique_sets(self.vocab, data_flat)

def _update_helper(self, df_series_clean, profile):
"""
Method for updating the column profile properties with a cleaned
dataset and the known null parameters of the dataset.
Update col profile properties with clean dataset and its known null parameters.

:param df_series_clean: df series with nulls removed
:type df_series_clean: pandas.core.series.Series
Expand All @@ -158,7 +162,7 @@ def _update_helper(self, df_series_clean, profile):

def update(self, df_series):
"""
Updates the column profile.
Update the column profile.

:param df_series: df series
:type df_series: pandas.core.series.Series
Expand Down