From 1aacb01385f9e6a5ec5fce9a51abce9d7cbfb69e Mon Sep 17 00:00:00 2001 From: benbenben2 <110821832+benbenben2@users.noreply.github.com> Date: Wed, 15 Nov 2023 09:39:04 +0100 Subject: [PATCH] fix: fix_avoid_eiweiss_false_positive_for_allergens (#9317) fix_avoid_eiweiss_false_positive_for_allergens --- lib/ProductOpener/Ingredients.pm | 4 ++++ taxonomies/ingredients.txt | 6 +++--- tests/unit/allergens.t | 15 +++++++++++++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/lib/ProductOpener/Ingredients.pm b/lib/ProductOpener/Ingredients.pm index 3f93f6213e6d0..38b67ad721fa7 100644 --- a/lib/ProductOpener/Ingredients.pm +++ b/lib/ProductOpener/Ingredients.pm @@ -6689,6 +6689,10 @@ sub detect_allergens_from_text ($product_ref) { $text =~ s/\b___([^,;_\(\)\[\]]+?)___\b/replace_allergen($language,$product_ref,$1,$`)/iesg; $text =~ s/\b__([^,;_\(\)\[\]]+?)__\b/replace_allergen($language,$product_ref,$1,$`)/iesg; $text =~ s/\b_([^,;_\(\)\[\]]+?)_\b/replace_allergen($language,$product_ref,$1,$`)/iesg; + # _Weizen_eiweiß is not caught in last regex because of \b (word boundary). + if ($language eq 'de') { + $text =~ s/\b_([^,;_\(\)\[\]]+?)_/replace_allergen($language,$product_ref,$1,$`)/iesg; + } # allergens in all caps, with other ingredients not in all caps diff --git a/taxonomies/ingredients.txt b/taxonomies/ingredients.txt index e6f4aec69c7aa..2216f7fe6b460 100644 --- a/taxonomies/ingredients.txt +++ b/taxonomies/ingredients.txt @@ -22150,7 +22150,7 @@ cs:pšenice cv:тулă cy:gwenith da:hvede -de:weizen +de:weizen, weizen art dv:ގޮދަން el:σιτάρι es:trigo @@ -30941,7 +30941,7 @@ en:wheat protein bg:пшеничен протеин ca:proteïna de blat cs:pšeničná bílkovina, pšeničná bilkovina -de:Weizenprotein, Weizeneiweiß, Weizeneiweiss +de:Weizenprotein, Weizeneiweiß, Weizeneiweiss, Weizen eiweiß es:Proteina de trigo fi:vehnäproteiini fr:protéine de blé, protéines de blé, protéines de froment @@ -87042,7 +87042,7 @@ bn:সয়া সস ca:salsa de soia cs:sójová omáčka da:sojasovs, sojasauce -de:Sojasauce, Sojasoße, Sojasosse, Soja Sosse, Sojasoßen, Sojasossen, Sojasaucen +de:Sojasauce, Sojasoße, Sojasosse, Soja Sosse, Sojasoßen, Sojasossen, Sojasaucen, Soja soße el:Σάλτσα σόγιας eo:sojsaŭco es:salsa de soya, salsa de soja,shoyu diff --git a/tests/unit/allergens.t b/tests/unit/allergens.t index 50ed520670f77..676570fc2fba3 100644 --- a/tests/unit/allergens.t +++ b/tests/unit/allergens.t @@ -518,4 +518,19 @@ detect_allergens_from_text($product_ref); is($product_ref->{ingredients_text_with_allergens_en}, "Whole Grain Oat Flakes (65.0%)"); +# German and underscores, see issue #8386 +$product_ref = { + lc => "de", + lang => "de", + ingredients_text_de => + "Seitan 65% (_Weizen_eiweiß, Wasser), Rapsöl, Kidneybohnen, _Dinkel_vollkornmehl (_Weizen_art), Apfelessig, Gewürze, Tomatenmark, _Soja_soße (Wasser, _Soja_bohnen, Salz, _Weizen_mehl), Kartoffelstärke, Salz." +}; + +compute_languages($product_ref); +detect_allergens_from_text($product_ref); + +diag explain $product_ref->{allergens_tags}; + +is_deeply($product_ref->{allergens_tags}, ['en:gluten', 'en:soybeans',]) || diag explain $product_ref->{allergens_tags}; + done_testing();