Skip to content

Commit

Permalink
feat: dq_all_val_in_nutrition_are_identical (#9320)
Browse files Browse the repository at this point in the history
* dq_all_val_in_nutrition_are_identical

* improvements

* make lint

* fix typo

* manually change test result

* manually change test result
  • Loading branch information
benbenben2 authored Dec 5, 2023
1 parent 5293f10 commit f18bf44
Show file tree
Hide file tree
Showing 4 changed files with 242 additions and 158 deletions.
76 changes: 36 additions & 40 deletions lib/ProductOpener/DataQualityFood.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1046,14 +1046,11 @@ sub check_nutrition_data ($product_ref) {

if (defined $product_ref->{nutriments}) {

my $nid_n = 0;
my $nid_zero = 0;
my $nid_non_zero = 0;

my $total = 0;
# variables to check if there are 3 or more duplicates in nutriments
my @major_nutriments_values = ();
my %nutriments_values_occurences = ();
my %nutriments_values = ();

if ( (defined $product_ref->{nutriments}{"energy-kcal_value"})
and (defined $product_ref->{nutriments}{"energy-kj_value"}))
Expand Down Expand Up @@ -1114,50 +1111,37 @@ sub check_nutrition_data ($product_ref) {

push @{$product_ref->{data_quality_errors_tags}}, "en:nutrition-value-over-1000-$nid2";
}
# fruits vegetables estimate is a computed value, it should not count for empty / non-empty values
if ($nid !~ /fruits-vegetables-nuts-estimate-from-ingredients/) {
if ($product_ref->{nutriments}{$nid} == 0) {
$nid_zero++;
}
else {
$nid_non_zero++;
}
}
# negative value in nutrition table, exclude key containing "nutrition-score" as they can be negative

if (($product_ref->{nutriments}{$nid} < 0) and (index($nid, "nutrition-score") == -1)) {
push @{$product_ref->{data_quality_errors_tags}}, "en:nutrition-value-negative-$nid2";
}
}

$nid_n++;

if ( (defined $product_ref->{nutriments}{$nid . "_100g"})
and (($nid eq 'fat') or ($nid eq 'carbohydrates') or ($nid eq 'proteins') or ($nid eq 'salt')))
{
$total += $product_ref->{nutriments}{$nid . "_100g"};
}

# variables to check if there are 3 or more duplicates in nutriments
if (
(
($nid eq 'fat_100g')
or ($nid eq 'saturated-fat_100g')
or ($nid eq 'carbohydrates_100g')
or ($nid eq 'sugars_100g')
or ($nid eq 'fiber_100g')
or ($nid eq 'proteins_100g')
or ($nid eq 'salt_100g')
or ($nid eq 'sodium_100g')
)
and ($product_ref->{nutriments}{$nid} > 1)
)
# variables to check if there are many duplicates in nutriments
if ( ($nid eq 'energy-kj_100g')
or ($nid eq 'energy-kcal_100g')
or ($nid eq 'fat_100g')
or ($nid eq 'saturated-fat_100g')
or ($nid eq 'carbohydrates_100g')
or ($nid eq 'sugars_100g')
or ($nid eq 'fiber_100g')
or ($nid eq 'proteins_100g')
or ($nid eq 'salt_100g')
or ($nid eq 'sodium_100g'))
{
push(@major_nutriments_values, $product_ref->{nutriments}{$nid});
$nutriments_values{$nid} = $product_ref->{nutriments}{$nid};
}

}

# create a hash key: nutriment value, value: number of occurence
# create a hash key: nutriment value, value: number of occurences
foreach my $nutriment_value (@major_nutriments_values) {
if (exists($nutriments_values_occurences{$nutriment_value})) {
$nutriments_values_occurences{$nutriment_value}++;
Expand All @@ -1166,13 +1150,29 @@ sub check_nutrition_data ($product_ref) {
$nutriments_values_occurences{$nutriment_value} = 1;
}
}
# raise warning if there are 3 or more duplicates in nutriments
foreach my $keys (keys %nutriments_values_occurences) {
if ($nutriments_values_occurences{$keys} > 2) {
push @{$product_ref->{data_quality_warnings_tags}}, "en:nutrition-3-or-more-values-are-identical";
last;
# retrieve max number of occurences
my $nutriments_values_occurences_max_value = -1;
# raise warning if there are 3 or more duplicates in nutriments and nutriment is above 1
foreach my $key (keys %nutriments_values_occurences) {
if (($nutriments_values_occurences{$key} > 2) and ($key > 1)) {
add_tag($product_ref, "data_quality_warnings", "en:nutrition-3-or-more-values-are-identical");
}
if ($nutriments_values_occurences{$key} > $nutriments_values_occurences_max_value) {
$nutriments_values_occurences_max_value = $nutriments_values_occurences{$key};
}
}
# raise error if
# all values are identical
# OR
# all values but one - because sodium and salt can be automatically calculated one depending on the value of the other - are identical
if (
($nutriments_values_occurences_max_value == scalar @major_nutriments_values)
or ( ($nutriments_values_occurences_max_value >= scalar @major_nutriments_values - 1)
and ($nutriments_values{'salt_100g'} != $nutriments_values{'sodium_100g'}))
)
{
push @{$product_ref->{data_quality_errors_tags}}, "en:nutrition-values-are-all-identical";
}

if ($total > 105) {
push @{$product_ref->{data_quality_errors_tags}}, "en:nutrition-value-total-over-105";
Expand All @@ -1187,10 +1187,6 @@ sub check_nutrition_data ($product_ref) {
push @{$product_ref->{data_quality_errors_tags}}, "en:nutrition-value-over-3800-energy";
}

if (($nid_non_zero == 0) and ($nid_zero > 0) and ($nid_zero == $nid_n)) {
push @{$product_ref->{data_quality_errors_tags}}, "en:all-nutrition-values-are-set-to-0";
}

if (
(defined $product_ref->{nutriments}{"carbohydrates_100g"})
and (
Expand Down
Loading

0 comments on commit f18bf44

Please sign in to comment.