Skip to content

Commit

Permalink
feat: revamped taxonomy sort script (#9818)
Browse files Browse the repository at this point in the history
to really sort everything (within an entry)
  • Loading branch information
alexgarel authored Mar 7, 2024
1 parent 2b2d85f commit fee8d3d
Show file tree
Hide file tree
Showing 5 changed files with 230 additions and 65 deletions.
1 change: 0 additions & 1 deletion .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ Stores:
🧬 Taxonomies:
- taxonomies/**/*
- scripts/taxonomies/**/*
- scripts/sort_languages_in_taxonomy.pl
- cgi/translate_taxonomy.pl

🧬 Taxonomies - Rebuild:
Expand Down
18 changes: 16 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -241,8 +241,10 @@ front_build:


checks: front_build front_lint check_perltidy check_perl_fast check_critic
# TODO: add check_taxonomies when taxonomies ready

lint: lint_perltidy
# TODO: add lint_taxonomies when taxonomies ready

tests: build_lang_test unit_test integration_test

Expand Down Expand Up @@ -321,7 +323,7 @@ bash_test:
# the ls at the end is to avoid removed files.
# the first commad is to check we have git (to avoid trying to run this line inside the container on check_perl*)
# We have to finally filter out "." as this will the output if we have no file
TO_CHECK=$(shell [ -x "`which git 2>/dev/null`" ] && git diff origin/main --name-only | grep '.*\.\(pl\|pm\|t\)$$' | grep -v "scripts/obsolete" | xargs ls -d 2>/dev/null | grep -v "^.$$" )
TO_CHECK := $(shell [ -x "`which git 2>/dev/null`" ] && git diff origin/main --name-only | grep '.*\.\(pl\|pm\|t\)$$' | grep -v "scripts/obsolete" | xargs ls -d 2>/dev/null | grep -v "^.$$" )

check_perl_fast:
@echo "🥫 Checking ${TO_CHECK}"
Expand All @@ -341,7 +343,7 @@ check_perl:

# check with perltidy
# we exclude files that are in .perltidy_excludes
TO_TIDY_CHECK = $(shell echo ${TO_CHECK}| tr " " "\n" | grep -vFf .perltidy_excludes)
TO_TIDY_CHECK := $(shell echo ${TO_CHECK}| tr " " "\n" | grep -vFf .perltidy_excludes)
check_perltidy:
@echo "🥫 Checking with perltidy ${TO_TIDY_CHECK}"
test -z "${TO_TIDY_CHECK}" || ${DOCKER_COMPOSE} run --rm --no-deps backend perltidy --assert-tidy -opath=/tmp/ --standard-error-output ${TO_TIDY_CHECK}
Expand All @@ -358,6 +360,18 @@ check_critic:
@echo "🥫 Checking with perlcritic"
test -z "${TO_CHECK}" || ${DOCKER_COMPOSE} run --rm --no-deps backend perlcritic ${TO_CHECK}

TAXONOMIES_TO_CHECK := $(shell [ -x "`which git 2>/dev/null`" ] && git diff origin/main --name-only | grep 'taxonomies*/*\.txt$$' | grep -v '\.result.txt' | xargs ls -d 2>/dev/null | grep -v "^.$$")

check_taxonomies:
@echo "🥫 Checking taxonomies"
test -z "${TAXONOMIES_TO_CHECK}" || \
${DOCKER_COMPOSE} run --rm --no-deps backend scripts/taxonomies/sort_each_taxonomy_entry.sh --check ${TAXONOMIES_TO_CHECK}

lint_taxonomies:
@echo "🥫 Linting taxonomies"
test -z "${TAXONOMIES_TO_CHECK}" || \
${DOCKER_COMPOSE} run --rm --no-deps backend scripts/taxonomies/sort_each_taxonomy_entry.sh ${TAXONOMIES_TO_CHECK}


check_openapi_v2:
docker run --rm \
Expand Down
62 changes: 0 additions & 62 deletions scripts/sort_languages_in_taxonomy.pl

This file was deleted.

171 changes: 171 additions & 0 deletions scripts/taxonomies/sort_each_taxonomy_entry.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
#!/usr/bin/perl -w

# This file is part of Product Opener.
#
# Product Opener
# Copyright (C) 2011-2023 Association Open Food Facts
# Contact: [email protected]
# Address: 21 rue des Iles, 94100 Saint-Maur des Fossés, France
#
# Product Opener is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

use Modern::Perl '2017';
use utf8;

use ProductOpener::Store qw/:all/;
use ProductOpener::Config qw/:all/;

my $is_check = grep {$_ eq "--check"} @ARGV;
my $is_verbose = grep {$_ eq "-v"} @ARGV;
my $has_changes = 0;

binmode(STDIN, ":encoding(UTF-8)");
binmode(STDOUT, ":encoding(UTF-8)");
binmode(STDERR, ":encoding(UTF-8)");

sub cmp_on_language ($$) {
my $a = shift;
my $b = shift;
if ((!defined $a) || (!defined $b)) {
return $a cmp $b;
}
$a = lc($a);
$b = lc($b);
my $a_prefix = undef;
my $b_prefix = undef;
if ($a =~ /^(\w+):(\w+)$/) {
$a_prefix = $1;
$a = $2;
}
if ($b =~ /^(\w+):(\w+)$/) {
$b_prefix = $1;
$b = $2;
}
if ($a_prefix && $b_prefix) {
return $a_prefix cmp $b_prefix if ($a_prefix ne $b_prefix);
}
return 0 if ($a eq $b);
# en and xx takes precedence over all others
return -1 if ($a eq "xx");
return 1 if ($b eq "xx");
return -1 if ($a eq "en"); # because of lines above, $b ne xx
return 1 if ($b eq "en"); # because of lines above, $a ne xx
return $a cmp $b;
}

# read all in memory to take care of last line in a simple way
my @lines = (<STDIN>);

# be sure to end with a blank line
push @lines, "\n" unless $lines[-1] =~ /^\s*$/;

# structures for one entry
my @parents = (); # lines defining parents
my $entry_id_line = undef; # line defining entry id, we don't want to change it's position
my %entries = (); # lines defining synonyms
my %props = (); # lines defining properties
my @original_lines = ();
# non meaningful lines above a meaningful line (entry, parent or porperty)
my @previous_lines = ();
my $line_num = 0;
my $entry_start_line = 1; # tracking line number of the first line of an entry
foreach my $line (@lines) {
$line_num += 1;
push @original_lines, $line; # collect lines for comparison

# blank line means we are changing entry, so let's print collected lines
if ($line =~ /^\s*$/) {
my @output_lines = ();
# sort items
@parents = sort {$a->{line} cmp $b->{line}} @parents;
my @sorted_entries = sort cmp_on_language (keys %entries);
my @sorted_props = sort cmp_on_language (keys %props);
# print parents, line id, synonyms, sorted props
for my $parent (@parents) {
push @output_lines, @{$parent->{previous}};
push @output_lines, $parent->{line};
}
if (defined $entry_id_line) {
push @output_lines, @{$entry_id_line->{previous}};
push @output_lines, $entry_id_line->{line};
}
for my $key (@sorted_entries) {
push @output_lines, @{$entries{$key}->{previous}};
push @output_lines, $entries{$key}->{line};
}
for my $key (@sorted_props) {
push @output_lines, @{$props{$key}->{previous}};
push @output_lines, $props{$key}->{line};
}
# print remaining previous_lines (if any)
push @output_lines, @previous_lines;
# print this blank line
push @output_lines, $line;
my $original = join("", @original_lines);
my $output = join("", @output_lines);
if ($is_check) {
# compare with original lines
if (not $original eq $output) {
$has_changes = 1;
if ($is_verbose) {
print "Error: output is not the same as original, line $entry_start_line..$line_num\n";
print "Original --------------------\n";
print "$original\n";
print "Sorted --------------------\n";
print "$output\n";
}
}
}
else {
print "$output";
}
# re-init
$entry_id_line = undef;
@parents = ();
%entries = ();
%props = ();
@previous_lines = ();
@original_lines = ();
$entry_start_line = $line_num;
}
# parents
elsif ($line =~ /^</) {
push @parents, {line => $line, previous => [@previous_lines]};
@previous_lines = ();
}
# synonym
elsif ($line =~ /^(\w+):[^:]*(,.*)*$/) {
if (!defined $entry_id_line) {
$entry_id_line = {line => $line, previous => [@previous_lines]};
}
else {
my $lc = $1;
$entries{$lc} = {line => $line, previous => [@previous_lines]};
}
@previous_lines = ();
}
# property
elsif ($line =~ /^(\w+):(\w+):(.*)$/) {
my $prop = $1;
my $lc = $2;
$props{"$prop:$lc"} = {line => $line, previous => [@previous_lines]};
@previous_lines = ();
}
# comments or undefined
else {
push @previous_lines, $line;
}
}

exit($is_check and $has_changes);
43 changes: 43 additions & 0 deletions scripts/taxonomies/sort_each_taxonomy_entry.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/env bash

ARGS=();
FILES=();

ACTION="Linting"
# options are passed as arguments to the script
for arg in "$@"
do
if [[ "$arg" = -* ]]
then
ARGS+=( "$arg" );
else
FILES+=( "$arg" );
fi
if [[ "$arg" = "--check" ]]
then
IS_CHECK=1;
ACTION="Checking"
fi
done

script=$(dirname $0 )"/sort_each_taxonomy_entry.pl"
FINAL_EXIT=0;
for taxonomy in "${FILES[@]}"
do
echo "$ACTION $taxonomy ==============="
rm -f $taxonomy.tmp
# redirect output only if we're not checking
( \
[[ -z "$IS_CHECK" ]] && exec >$taxonomy.tmp; \
$script "${ARGS[@]}" <$taxonomy; \
)
[[ -s $taxonomy.tmp ]] && mv $taxonomy.tmp $taxonomy
EXIT=$?
if [[ $EXIT -ne 0 ]]
then
echo "=> Error in $taxonomy"
FINAL_EXIT=$EXIT;
fi
done

exit $FINAL_EXIT;

0 comments on commit fee8d3d

Please sign in to comment.