From 5523a152064449698bc1e03d1adc62ec103538e6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Gigandet?= Date: Mon, 13 Nov 2023 10:25:14 +0100 Subject: [PATCH] fix: quantities starting with a dot .33L (#9284) --- lib/ProductOpener/Numbers.pm | 18 ++++++++++++++++++ lib/ProductOpener/Units.pm | 6 +++--- tests/unit/units.t | 11 ++++++++++- 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/lib/ProductOpener/Numbers.pm b/lib/ProductOpener/Numbers.pm index 8b2ea79624d3b..c30d6ce211605 100644 --- a/lib/ProductOpener/Numbers.pm +++ b/lib/ProductOpener/Numbers.pm @@ -40,6 +40,7 @@ BEGIN { &remove_insignificant_digits &convert_string_to_number + $number_regexp ); # symbols to export on request %EXPORT_TAGS = (all => [@EXPORT_OK]); @@ -47,6 +48,23 @@ BEGIN { use vars @EXPORT_OK; +=head1 VARIABLES + +=head2 $number_regexp + +Regular expression to match something that looks like a number: +32 +32.5 +0.5 +.5 +32,5 + +=cut + +# dot followed by digits, +# or digits followed by a dot or a comma, optionnaly followed by 0 or more digits +$number_regexp = '\.\d+|\d+(?:(?:\,|\.)\d+)?'; + =head1 FUNCTIONS =head2 remove_insignificant_digits($) diff --git a/lib/ProductOpener/Units.pm b/lib/ProductOpener/Units.pm index 42a580876bbc6..fb592db9f3b5c 100644 --- a/lib/ProductOpener/Units.pm +++ b/lib/ProductOpener/Units.pm @@ -245,7 +245,7 @@ sub normalize_quantity ($quantity) { # 10 unités, 170 g # 4 bouteilles en verre de 20cl if ($quantity - =~ /(?\d+)(\s(\p{Letter}| )+)?(\s)?( de | of |x|\*)(\s)?(?(\d+)(\.|,)?(\d+)?)(\s)?(?$units_regexp)\b/i + =~ /(?\d+)(\s(\p{Letter}| )+)?(\s)?( de | of |x|\*)(\s)?(?$number_regexp)(\s)?(?$units_regexp)\b/i ) { my $m = $+{number}; @@ -254,7 +254,7 @@ sub normalize_quantity ($quantity) { $q = convert_string_to_number($q); $q = unit_to_g($q * $m, $u); } - elsif ($quantity =~ /(?(\d+)(\.|,)?(\d+)?)(\s)?(?$units_regexp)\s*\b/i) { + elsif ($quantity =~ /(?$number_regexp)(\s)?(?$units_regexp)\s*\b/i) { $q = lc($+{quantity}); $u = $+{unit}; $q = convert_string_to_number($q); @@ -276,7 +276,7 @@ sub normalize_serving_size ($serving) { # Regex captures any ( )? group, but leaves allowances for a preceding # token to allow for patterns like "One bag (32g)", "1 small bottle (180ml)" etc - if ($serving =~ /^(.*[ \(])?(?(\d+)(\.|,)?(\d+)?)( )?(?$units_regexp)\b/i) { + if ($serving =~ /^(.*[ \(])?(?$number_regexp)( )?(?$units_regexp)\b/i) { my $q = $+{quantity}; my $u = $+{unit}; $q = convert_string_to_number($q); diff --git a/tests/unit/units.t b/tests/unit/units.t index e65c057a24363..bc835523f6df0 100644 --- a/tests/unit/units.t +++ b/tests/unit/units.t @@ -141,6 +141,15 @@ is(normalize_quantity("2 kgr"), 2000); is(normalize_quantity("2 kilogramme"), 2000); is(normalize_quantity("2 kilogrammes"), 2000); +# . without a 0 before +is(normalize_quantity(".33L"), 330); +is(normalize_quantity(".33 l"), 330); +is(normalize_serving_size(".33L"), 330); +is(normalize_serving_size(".33 l"), 330); +is(normalize_serving_size("5 bottles (.33L)"), 330); +is(normalize_serving_size("5 bottles .33L"), 330); +is(normalize_serving_size("5 bottles2.33L"), undef); # Broken string, missing word separator before number + my @serving_sizes = ( ["100g", "100"], ["250 g", "250"], @@ -154,7 +163,7 @@ my @serving_sizes = ( ); foreach my $test_ref (@serving_sizes) { - is(normalize_serving_size($test_ref->[0]), $test_ref->[1]); + is(normalize_serving_size($test_ref->[0]), $test_ref->[1]) or diag explain $test_ref; } done_testing();