Skip to content

Commit

Permalink
fix: fix_avoid_eiweiss_false_positive_for_allergens (#9317)
Browse files Browse the repository at this point in the history
fix_avoid_eiweiss_false_positive_for_allergens
  • Loading branch information
benbenben2 authored Nov 15, 2023
1 parent 82138ca commit 1aacb01
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 3 deletions.
4 changes: 4 additions & 0 deletions lib/ProductOpener/Ingredients.pm
Original file line number Diff line number Diff line change
Expand Up @@ -6689,6 +6689,10 @@ sub detect_allergens_from_text ($product_ref) {
$text =~ s/\b___([^,;_\(\)\[\]]+?)___\b/replace_allergen($language,$product_ref,$1,$`)/iesg;
$text =~ s/\b__([^,;_\(\)\[\]]+?)__\b/replace_allergen($language,$product_ref,$1,$`)/iesg;
$text =~ s/\b_([^,;_\(\)\[\]]+?)_\b/replace_allergen($language,$product_ref,$1,$`)/iesg;
# _Weizen_eiweiß is not caught in last regex because of \b (word boundary).
if ($language eq 'de') {
$text =~ s/\b_([^,;_\(\)\[\]]+?)_/replace_allergen($language,$product_ref,$1,$`)/iesg;
}

# allergens in all caps, with other ingredients not in all caps

Expand Down
6 changes: 3 additions & 3 deletions taxonomies/ingredients.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22150,7 +22150,7 @@ cs:pšenice
cv:тулă
cy:gwenith
da:hvede
de:weizen
de:weizen, weizen art
dv:ގޮދަން
el:σιτάρι
es:trigo
Expand Down Expand Up @@ -30941,7 +30941,7 @@ en:wheat protein
bg:пшеничен протеин
ca:proteïna de blat
cs:pšeničná bílkovina, pšeničná bilkovina
de:Weizenprotein, Weizeneiweiß, Weizeneiweiss
de:Weizenprotein, Weizeneiweiß, Weizeneiweiss, Weizen eiweiß
es:Proteina de trigo
fi:vehnäproteiini
fr:protéine de blé, protéines de blé, protéines de froment
Expand Down Expand Up @@ -87042,7 +87042,7 @@ bn:সয়া সস
ca:salsa de soia
cs:sójová omáčka
da:sojasovs, sojasauce
de:Sojasauce, Sojasoße, Sojasosse, Soja Sosse, Sojasoßen, Sojasossen, Sojasaucen
de:Sojasauce, Sojasoße, Sojasosse, Soja Sosse, Sojasoßen, Sojasossen, Sojasaucen, Soja soße
el:Σάλτσα σόγιας
eo:sojsaŭco
es:salsa de soya, salsa de soja,shoyu
Expand Down
15 changes: 15 additions & 0 deletions tests/unit/allergens.t
Original file line number Diff line number Diff line change
Expand Up @@ -518,4 +518,19 @@ detect_allergens_from_text($product_ref);

is($product_ref->{ingredients_text_with_allergens_en}, "Whole Grain Oat Flakes (65.0%)");

# German and underscores, see issue #8386
$product_ref = {
lc => "de",
lang => "de",
ingredients_text_de =>
"Seitan 65% (_Weizen_eiweiß, Wasser), Rapsöl, Kidneybohnen, _Dinkel_vollkornmehl (_Weizen_art), Apfelessig, Gewürze, Tomatenmark, _Soja_soße (Wasser, _Soja_bohnen, Salz, _Weizen_mehl), Kartoffelstärke, Salz."
};

compute_languages($product_ref);
detect_allergens_from_text($product_ref);

diag explain $product_ref->{allergens_tags};

is_deeply($product_ref->{allergens_tags}, ['en:gluten', 'en:soybeans',]) || diag explain $product_ref->{allergens_tags};

done_testing();

0 comments on commit 1aacb01

Please sign in to comment.