Skip to content

Commit

Permalink
feat: suggest fuzzy + limit as a param (#6887)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexgarel authored Jun 13, 2022
1 parent 7354ebf commit a6622f8
Show file tree
Hide file tree
Showing 2 changed files with 105 additions and 21 deletions.
109 changes: 88 additions & 21 deletions cgi/suggest.pl
Original file line number Diff line number Diff line change
Expand Up @@ -42,84 +42,151 @@
use Storable qw/dclone/;
use Encode;
use JSON::PP;
use List::Util qw/min/;

ProductOpener::Display::init();

=head1 CGI script to auto-complete entries for tags
=head2 Request parameters
=head3 tagtype - the type of tag
=head3 string - string to search
=head3 term - term to search
If string and term are passed together, they are contatenated together as separated words
=head3 limit - max number of suggestions
Warning, we are currently doing a brute force search, so avoid setting it too high
=cut

my $tagtype = param('tagtype');
my $string = decode utf8=>param('string');
# searched term
my $term = decode utf8=>param('term');

# search language code
my $search_lc = $lc;

# superseed by request parameter
if (defined param('lc')) {
$search_lc = param('lc');
}

my $original_lc = $search_lc;

# if search begins with a language code, use it for search
if ($term =~ /^(\w\w):/) {
$search_lc = $1;
$term = $';
}

# max results
my $limit = 25;
# superseed by request parameter
if (defined param('limit')) {
# we put a hard limit however
$limit = min(int(param('limit')), 400);
}


my @suggestions = (); # Suggestions starting with the term
my @suggestions_c = (); # Suggestions containing the term
my @suggestions_f = (); # fuzzy suggestions

my $cache_max_age = 0;
my $limit = 25;
my $i = 0;
my $suggestions_count = 0;


# search for emb codes
if ($tagtype eq 'emb_codes') {
my $stringid = get_string_id_for_lang("no_language", normalize_packager_codes($term));
my @tags = sort keys %packager_codes;
foreach my $canon_tagid (@tags) {
next if $canon_tagid !~ /^$stringid/;
push @suggestions, normalize_packager_codes($canon_tagid);
last if ++$i >= $limit;
last if ++$suggestions_count >= $limit;
}
# add cache to request
$cache_max_age = 3600;
}
else {
my $stringid = get_string_id_for_lang($search_lc, $string) . get_string_id_for_lang($search_lc, $term);
# search for term in a taxonomy

# normalize string and term
my $stringid = get_string_id_for_lang($search_lc, $string) . "-" . get_string_id_for_lang($search_lc, $term);
# remove eventual leading or ending "-"
$stringid =~ s/^-//;
$stringid =~ s/^-$//;
# fuzzy match whole words with eventual inter-words
my $fuzzystringid = join(".*", split("-", $stringid));
# all tags can be retrieve from the $translations_to hash
my @tags = sort keys %{$translations_to{$tagtype}} ;
foreach my $canon_tagid (@tags) {

# just_synonyms are not real entries
next if defined $just_synonyms{$tagtype}{$canon_tagid};

my $tag;
my $tagid;


my $tag; # this is the content string
my $tagid; # this is the tag

# search if the tag exists in target language
if (defined $translations_to{$tagtype}{$canon_tagid}{$search_lc}) {

$tag = $translations_to{$tagtype}{$canon_tagid}{$search_lc};
# TODO: explain why $tagid can be different from $canon_tagid
$tagid = get_string_id_for_lang($search_lc, $tag);


# add language prefix if we are not searching current interface language
if (not ($search_lc eq $original_lc)) {
$tag = $search_lc . ":" . $tag;
}
}
# also search for special language code "xx" which is universal
elsif (defined $translations_to{$tagtype}{$canon_tagid}{xx}) {
$tag = $translations_to{$tagtype}{$canon_tagid}{xx};
$tagid = get_string_id_for_lang("xx", $tag);
}

if (defined $tag) {

next if $tagid !~ /$stringid/;

if ($tag =~ /^$stringid/i) {
# matching at start, best matches
if ($tagid =~ /^$stringid/) {
push @suggestions, $tag;
# only matches at start are considered
$suggestions_count++;
}
else {
# matching inside
elsif ($tagid =~ /$stringid/) {
push @suggestions_c, $tag;
}
last if ++$i >= $limit;
# fuzzy match
elsif ($tagid =~ /$fuzzystringid/) {
push @suggestions_f, $tag;
}
# end as soon as we got enough
last if $suggestions_count >= $limit;
}
}
# add cache to request
$cache_max_age = 3600;
}
push @suggestions, @suggestions_c;
my $data = encode_json(\@suggestions);
# sort best suggestions
@suggestions = sort @suggestions;
# suggestions containing term
my $contains_to_add = min($limit - (scalar @suggestions), scalar @suggestions_c) - 1;
if ($contains_to_add >= 0) {
push @suggestions, @suggestions_c[0..$contains_to_add];
}
# Suggestions as fuzzy match
my $fuzzy_to_add = min($limit - (scalar @suggestions), scalar @suggestions_f) - 1;
if ($fuzzy_to_add >= 0) {
push @suggestions, @suggestions_f[0..$fuzzy_to_add];
}
my $data = encode_json(\@suggestions);

# send response
print header(
-type => 'application/json',
-charset => 'utf-8',
Expand Down
17 changes: 17 additions & 0 deletions lib/ProductOpener/Tags.pm
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,21 @@ sub remove_plurals($$) {



=head2 build_tags_taxonomy( $tagtype, $file, $publish )
Build taxonomy from the taxonomy file
=head3 Arguments
=head4 str $tagtype - the tagtype
Like "categories", "ingredients"
=head3 $file - name of the file to read in taxonomies folder
=head3 $publish - if 1, store the result in sto
=cut
sub build_tags_taxonomy($$$) {

my $tagtype = shift;
Expand All @@ -807,6 +822,8 @@ sub build_tags_taxonomy($$$) {
$root_entries{$tagtype} = {};

$just_tags{$tagtype} = {};
# synonyms that are not real entries, but only enrich existing tags
# they correspond to synonyms: entries
$just_synonyms{$tagtype} = {};
$properties{$tagtype} = {};

Expand Down

0 comments on commit a6622f8

Please sign in to comment.