Skip to content

Commit

Permalink
feat: Taxonomy suggestions API v3 for packaging shapes and materials (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
stephanegigandet authored Jan 25, 2023
1 parent 5dce3c0 commit 43c74d6
Show file tree
Hide file tree
Showing 63 changed files with 1,904 additions and 274 deletions.
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ RUN \
done && \
chown www-data:www-data -R /mnt/podata && \
# Create symlinks of data files that are indeed conf data in /mnt/podata (because we currently mix data and conf data)
for path in ecoscore emb_codes forest-footprint ingredients packager-codes po taxonomies templates; do \
for path in data-default ecoscore emb_codes forest-footprint ingredients packager-codes po taxonomies templates; do \
ln -sf /opt/product-opener/${path} /mnt/podata/${path}; \
done && \
# Create some necessary files to ensure permissions in volumes
Expand Down
2 changes: 1 addition & 1 deletion cgi/product_multilingual.pl
Original file line number Diff line number Diff line change
Expand Up @@ -684,7 +684,7 @@ ($product_ref, $field, $language)
if (defined $tags_fields{$fieldtype}) {
$class = "tagify-me";
if ((defined $taxonomy_fields{$fieldtype}) or ($fieldtype eq 'emb_codes')) {
$autocomplete = "$formatted_subdomain/cgi/suggest.pl?tagtype=$fieldtype&";
$autocomplete = "$formatted_subdomain/api/v3/taxonomy_suggestions?tagtype=$fieldtype";
}
}

Expand Down
184 changes: 26 additions & 158 deletions cgi/suggest.pl
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -26,178 +26,46 @@
use CGI qw/:cgi :form escapeHTML/;

use ProductOpener::Config qw/:all/;
use ProductOpener::Store qw/:all/;
use ProductOpener::Index qw/:all/;
use ProductOpener::Display qw/:all/;
use ProductOpener::Users qw/:all/;
use ProductOpener::Products qw/:all/;
use ProductOpener::Food qw/:all/;
use ProductOpener::Tags qw/:all/;
use ProductOpener::TaxonomySuggestions qw/:all/;
use ProductOpener::Lang qw/:all/;
use ProductOpener::PackagerCodes qw/:all/;
use ProductOpener::HTTP qw/:all/;

use CGI qw/:cgi :form escapeHTML/;
use URI::Escape::XS;
use Storable qw/dclone/;
use Encode;
use JSON::PP;
use List::Util qw/min/;
use Encode;

my $request_ref = ProductOpener::Display::init_request();

=head1 CGI script to auto-complete entries for tags
=head2 Request parameters
=head3 tagtype - the type of tag
=head3 string - string to search
=head3 term - term to search
If string and term are passed together, they are concatenated together as separate words
=head3 limit - max number of suggestions
Warning, we are currently doing a brute force search, so avoid setting it too high
=cut

my $tagtype = single_param('tagtype');
my $string = decode utf8 => single_param('string');
# searched term
my $term = decode utf8 => single_param('term');

# search language code
my $search_lc = $lc;
# superseed by request parameter
if (defined single_param('lc')) {
$search_lc = single_param('lc');
}

my $original_lc = $search_lc;

# if search begins with a language code, use it for search
if ($term =~ /^(\w\w):/) {
$search_lc = $1;
$term = $';
}

# max results
my $limit = 25;
# superseed by request parameter
if (defined single_param('limit')) {
# we put a hard limit however
$limit = min(int(single_param('limit')), 400);
}

my @suggestions = (); # Suggestions starting with the term
my @suggestions_c = (); # Suggestions containing the term
my @suggestions_f = (); # fuzzy suggestions

my $cache_max_age = 0;
my $suggestions_count = 0;

# search for emb codes
if ($tagtype eq 'emb_codes') {
my $stringid = get_string_id_for_lang("no_language", normalize_packager_codes($term));
my @tags = sort keys %packager_codes;
foreach my $canon_tagid (@tags) {
next if $canon_tagid !~ /^$stringid/;
push @suggestions, normalize_packager_codes($canon_tagid);
last if ++$suggestions_count >= $limit;
}
# add cache to request
$cache_max_age = 3600;
}
else {
# search for term in a taxonomy

# normalize string and term
my $stringid = get_string_id_for_lang($search_lc, $string) . "-" . get_string_id_for_lang($search_lc, $term);
# remove eventual leading or ending "-"
$stringid =~ s/^-//;
$stringid =~ s/^-$//;
# fuzzy match whole words with eventual inter-words
my $fuzzystringid = join(".*", split("-", $stringid));
# all tags can be retrieve from the $translations_to hash
my @tags = sort keys %{$translations_to{$tagtype}};
foreach my $canon_tagid (@tags) {
# just_synonyms are not real entries
next if defined $just_synonyms{$tagtype}{$canon_tagid};

my $tag; # this is the content string
my $tagid; # this is the tag

# search if the tag exists in target language
if (defined $translations_to{$tagtype}{$canon_tagid}{$search_lc}) {

$tag = $translations_to{$tagtype}{$canon_tagid}{$search_lc};
# TODO: explain why $tagid can be different from $canon_tagid
$tagid = get_string_id_for_lang($search_lc, $tag);

# add language prefix if we are not searching current interface language
if (not($search_lc eq $original_lc)) {
$tag = $search_lc . ":" . $tag;
}
}
# also search for special language code "xx" which is universal
elsif (defined $translations_to{$tagtype}{$canon_tagid}{xx}) {
$tag = $translations_to{$tagtype}{$canon_tagid}{xx};
$tagid = get_string_id_for_lang("xx", $tag);
}

if (defined $tag) {
# matching at start, best matches
if ($tagid =~ /^$stringid/) {
push @suggestions, $tag;
# only matches at start are considered
$suggestions_count++;
}
# matching inside
elsif ($tagid =~ /$stringid/) {
push @suggestions_c, $tag;
}
# fuzzy match
elsif ($tagid =~ /$fuzzystringid/) {
push @suggestions_f, $tag;
}
# end as soon as we got enough
last if $suggestions_count >= $limit;
}
}
# add cache to request
$cache_max_age = 3600;
}
# sort best suggestions
@suggestions = sort @suggestions;
# suggestions containing term
my $contains_to_add = min($limit - (scalar @suggestions), scalar @suggestions_c) - 1;
if ($contains_to_add >= 0) {
push @suggestions, @suggestions_c[0 .. $contains_to_add];
}
# Suggestions as fuzzy match
my $fuzzy_to_add = min($limit - (scalar @suggestions), scalar @suggestions_f) - 1;
if ($fuzzy_to_add >= 0) {
push @suggestions, @suggestions_f[0 .. $fuzzy_to_add];
}
my $search_lc = $request_ref->{lc};

# We need a taxonomy name to provide suggestions for
my $tagtype = request_param($request_ref, "tagtype");

# The API accepts a string input in the "string" field or "term" field.
# - term is used by the jquery Autocomplete widget: https://api.jqueryui.com/autocomplete/
# Use "string" only if both are present.
my $string = decode("utf8", (request_param($request_ref, 'string') || request_param($request_ref, 'term')));

# /cgi/suggest.pl supports only limited context (use /api/v3/taxonomy_suggestions to use richer context)
my $context_ref = {country => $request_ref->{country},};

# Options define how many suggestions should be returned, in which format etc.
my $options_ref = {limit => request_param($request_ref, 'limit')};

my @suggestions = get_taxonomy_suggestions($tagtype, $search_lc, $string, $context_ref, $options_ref);

my $data = encode_json(\@suggestions);

# send response
write_cors_headers();

if ($cache_max_age) {
print header(
-type => 'application/json',
-charset => 'utf-8',
-cache_control => 'public, max-age=' . $cache_max_age,
);
}
else {
print header(
-type => 'application/json',
-charset => 'utf-8',
);
}
print header(
-type => 'application/json',
-charset => 'utf-8',
-cache_control => 'public, max-age=' . 60, # 1 minute cache
);

print $data;

Large diffs are not rendered by default.

77 changes: 77 additions & 0 deletions docs/reference/api-v3.yml
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,83 @@ paths:
- an object sent in the packagings field will replace any pre-existing data.
- an object sent in the field suffixed with _add (e.g. packagings_add) will be merged with any pre-existing data.
parameters: []
/api/v3/taxonomy_suggestions:
parameters: []
get:
summary: Get taxonomy entries suggestions
tags: []
responses:
'200':
description: OK
content:
application/json:
schema:
allOf:
- $ref: ./responses/response-status/response_status.yaml
- type: object
properties:
suggestions:
type: array
description: Array of sorted strings suggestions in the language requested in the "lc" field.
items:
type: string
operationId: get-api-v3-taxonomy_suggestions-taxonomy
description: |-
Open Food Facts uses multilingual [taxonomies](https://wiki.openfoodfacts.org/Global_taxonomies) to normalize entries for categories, labels, ingredients, packaging shapes / materials / recycling instructions and many more fields.
This API returns taxonomy entries suggestions that can be used in product edit forms, search forms etc. (for instance in autocomplete dropdowns using libraries like Tagify or select2 on the Web).
Suggestions filtering:
The string parameter allows to get only suggestions that contain a specific string (useful for autocomplete suggestions).
Suggestions ordering:
- For packaging shapes and materials, suggestions are ordered first by the number of packaging components they appear in (restricted by country, categories and shape (for materials) if they are passed as parameters).
- for all other taxonomies, results are ordered alphabetically
If a string is passed, an additional sort is done to put first suggestions that start with the string, followed by suggestions with a word that start with the string, and then suggestions that contain the string anywhere.
parameters:
- $ref: ./api.yml#/components/parameters/tagtype
- schema:
type: string
example: en
in: query
name: lc
description: 2 letter code of the language used for suggestions and for matching the input string
- schema:
type: string
example: pe
in: query
name: string
description: 'Optional string used to filter suggestions (useful for autocomplete). If passed, suggestions starting with the string will be returned first, followed by suggestions matching the string at the beginning of a word, and suggestions matching the string inside a word.'
- schema:
type: string
in: query
name: cc
description: '2 letter country code, used to return popular packaging shapes and materials for products sold in the country'
- schema:
type: string
example: yougurts
in: query
name: categories
description: 'Comma separated list of categories tags (e.g. "en:fats,en:unsalted-butters" or categories names in the language indicated by the "lc" field (e.g. "graisses, beurres salés" in French)'
- schema:
type: string
example: bottle
in: query
name: shape
description: 'Shape of packaging component (tag identified in the packaging_shapes taxonomy, or plain text tag name in the language indicated by the "lc" field)'
- schema:
type: string
in: query
name: limit
description: 'Maximum number of suggestions. Default is 25, max is 400.'
- schema:
type: string
in: query
name: term
description: Alias for the "string" parameter provided for backward compatibility. "string" takes precedence.
components:
schemas: null
parameters: null
Expand Down
1 change: 0 additions & 1 deletion docs/reference/schemas/packagings/packagings-write.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,3 @@ description: |-
examples: []
items:
$ref: ./packaging_component-write.yaml
readOnly: true
45 changes: 45 additions & 0 deletions docs/reference/schemas/taxonomies/tagtype.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
title: tagtype
x-stoplight:
id: cyaecslbj8x2i
type: string
enum:
- additives_classes
- additives
- allergens
- amino_acids
- categories
- countries
- data_quality
- data_quality
- data_quality
- data_quality
- data_quality
- data_quality
- data_quality
- food_groups
- improvements
- ingredients_analysis
- ingredients_processing
- ingredients
- labels
- languages
- minerals
- misc
- nova_groups
- nucleotides
- nutrient_levels
- nutrients
- origins
- other_nutritional_substances
- packaging_materials
- packaging_recycling
- packaging
- packaging_shapes
- periods_after_opening
- preservation
- states
- test
- allergens
- vitamins
description: 'Identifier of a taxonomy. See https://wiki.openfoodfacts.org/Global_taxonomies and https://github.com/openfoodfacts/openfoodfacts-server/tree/main/taxonomies'
examples: []
10 changes: 5 additions & 5 deletions html/js/product-multilingual.js
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ function initializeTagifyInput(el) {
let abortController;
input.on("input", function(event) {
const value = event.detail.value;
input.settings.whitelist = []; // reset the whitelist
input.whitelist = null; // reset the whitelist

if (el.dataset.autocomplete && el.dataset.autocomplete !== "") {
// https://developer.mozilla.org/en-US/docs/Web/API/AbortController/abort
Expand All @@ -556,13 +556,13 @@ function initializeTagifyInput(el) {

abortController = new AbortController();

fetch(el.dataset.autocomplete + "term=" + value, {
fetch(el.dataset.autocomplete + "&string=" + value, {
signal: abortController.signal
}).
then((RES) => RES.json()).
then(function(whitelist) {
input.settings.whitelist = whitelist;
input.dropdown.show.call(input, value); // render the suggestions dropdown
then(function(json) {
input.whitelist = json.suggestions;
input.dropdown.show(value); // render the suggestions dropdown
});
}
});
Expand Down
Loading

0 comments on commit 43c74d6

Please sign in to comment.