Skip to content

Commit

Permalink
(#385) (#397) Remove the built-in Auto-Tag feature in favor of a File…
Browse files Browse the repository at this point in the history
…name parsing plugin
  • Loading branch information
Difegue committed Feb 13, 2021
1 parent 6e8f6e8 commit 1abce98
Show file tree
Hide file tree
Showing 9 changed files with 143 additions and 125 deletions.
4 changes: 1 addition & 3 deletions lib/LANraragi/Controller/Config.pm
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ sub index {
nofunmode => $self->LRR_CONF->enable_nofun,
apikey => $self->LRR_CONF->get_apikey,
enablecors => $self->LRR_CONF->enable_cors,
tagregex => $self->LRR_CONF->get_tagregex,
enableresize => $self->LRR_CONF->enable_resize,
sizethreshold => $self->LRR_CONF->get_threshold,
readerquality => $self->LRR_CONF->get_readquality,
Expand Down Expand Up @@ -70,8 +69,7 @@ sub save_config {
devmode => ( scalar $self->req->param('devmode') ? '1' : '0' ),
enableresize => ( scalar $self->req->param('enableresize') ? '1' : '0' ),
blackliston => ( scalar $self->req->param('blackliston') ? '1' : '0' ),
nofunmode => ( scalar $self->req->param('nofunmode') ? '1' : '0' ),
tagregex => ( scalar $self->req->param('tagregex') ? '1' : '0' )
nofunmode => ( scalar $self->req->param('nofunmode') ? '1' : '0' )
);

#only add newpassword field as password if enablepass = 1
Expand Down
2 changes: 1 addition & 1 deletion lib/LANraragi/Model/Archive.pm
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ sub find_untagged_archives {
remove_spaces($t);
remove_newlines($t);

# the following are the only namespaces that LANraragi::Utils::Database::parse_name adds
# The following are basic and therefore don't count as "tagged"
# date_added added for convenience as running the matching plugin doesn't really count as tagging
$nondefaulttags += 1 unless $t =~ /(artist|parody|series|language|event|group|date_added):.*/;
}
Expand Down
17 changes: 0 additions & 17 deletions lib/LANraragi/Model/Config.pm
Original file line number Diff line number Diff line change
Expand Up @@ -126,26 +126,9 @@ sub enable_nofun { return &get_redis_conf( "nofunmode", "0" ) }
sub enable_autotag { return &get_redis_conf( "autotag", "1" ) }
sub enable_cors { return &get_redis_conf( "enablecors", "0" ) }
sub get_apikey { return &get_redis_conf( "apikey", "" ) }
sub get_tagregex { return &get_redis_conf( "tagregex", "1" ) }
sub enable_blacklst { return &get_redis_conf( "blackliston", "1" ) }
sub enable_resize { return &get_redis_conf( "enableresize", "0" ) }
sub get_threshold { return &get_redis_conf( "sizethreshold", "1000" ) }
sub get_readquality { return &get_redis_conf( "readerquality", "50" ) }

#Regular Expression matching the E-Hentai standard: (Release) [Artist] TITLE (Series) [Language]
#Used in parsing.
#Stuff that's between unescaped ()s is put in a numbered variable: $1,$2,etc
#Parsing is only done the first time the file is found. The parsed info is then stored into Redis.
#Change this regex if you wish to use a different parsing for mass-addition of archives.

#()? indicates the field is optional.
#(\(([^([]+)\))? returns the content of (Release). Optional.
#(\[([^]]+)\])? returns the content of [Artist]. Optional.
#([^([]+) returns the title. Mandatory.
#(\(([^([)]+)\))? returns the content of (Series). Optional.
#(\[([^]]+)\])? returns the content of [Language]. Optional.
#\s* indicates zero or more whitespaces.
my $regex = qr/(\(([^([]+)\))?\s*(\[([^]]+)\])?\s*([^([]+)\s*(\(([^([)]+)\))?\s*(\[([^]]+)\])?/;
sub get_regex { return $regex }
1;
7 changes: 4 additions & 3 deletions lib/LANraragi/Model/Plugins.pm
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ sub exec_enabled_plugins_on_file {
my $successes = 0;
my $failures = 0;
my $addedtags = 0;
my $newtitle = "";

my @plugins = LANraragi::Utils::Plugins::get_enabled_plugins("metadata");

Expand Down Expand Up @@ -64,13 +65,13 @@ sub exec_enabled_plugins_on_file {
if ( exists $plugin_result{title} ) {
LANraragi::Utils::Database::set_title( $id, $plugin_result{title} );

# Increment added_tags if the title changed as well
$addedtags++;
$newtitle = $plugin_result{title};
$logger->debug("Changing title to $newtitle.");
}
}
}

return ( $successes, $failures, $addedtags );
return ( $successes, $failures, $addedtags, $newtitle );
}

# Unlike the two other methods, exec_login_plugin takes a plugin name and does the Redis lookup itself.
Expand Down
24 changes: 11 additions & 13 deletions lib/LANraragi/Model/Upload.pm
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ use LANraragi::Model::Category;
# Returns a status value, the ID and title of the file, and a status message.
sub handle_incoming_file {

my ( $tempfile, $catid, $extratags ) = @_;
my ( $filename, $dirs, $suffix ) = fileparse( $tempfile, qr/\.[^.]*/ );
my ( $tempfile, $catid, $tags ) = @_;
my ( $filename, $dirs, $suffix ) = fileparse( $tempfile, qr/\.[^.]*/ );
$filename = $filename . $suffix;
my $logger = get_logger( "File Upload/Download", "lanraragi" );

Expand Down Expand Up @@ -71,16 +71,10 @@ sub handle_incoming_file {

# Add the file to the database ourselves so Shinobu doesn't do it
# This allows autoplugin to be ran ASAP.
my ( $name, $title, $tags ) = LANraragi::Utils::Database::add_archive_to_redis( $id, $output_file, $redis );
my $name = LANraragi::Utils::Database::add_archive_to_redis( $id, $output_file, $redis );

# If additional tags were given to the sub, add them now.
if ($extratags) {

if ( $tags ne "" ) {
$tags = $tags . ", ";
}

$tags = $tags . $extratags;
if ($tags) {
$redis->hset( $id, "tags", encode_utf8($tags) );
}

Expand All @@ -94,16 +88,20 @@ sub handle_incoming_file {
move( $output_file . ".upload", $output_file );

unless ( -e $output_file ) {
return ( 0, $id, $title, "The file couldn't be moved to your content folder!" );
return ( 0, $id, $name, "The file couldn't be moved to your content folder!" );
}

my $successmsg = "File added successfully!";

if ( LANraragi::Model::Config->enable_autotag ) {
$logger->debug("Running autoplugin on newly uploaded file $id...");

my ( $succ, $fail, $addedtags ) = LANraragi::Model::Plugins::exec_enabled_plugins_on_file($id);
my ( $succ, $fail, $addedtags, $newtitle ) = LANraragi::Model::Plugins::exec_enabled_plugins_on_file($id);
$successmsg = "$succ Plugins used successfully, $fail Plugins failed, $addedtags tags added. ";

if ( $newtitle ne "" ) {
$name = $newtitle;
}
}

if ($catid) {
Expand All @@ -122,7 +120,7 @@ sub handle_incoming_file {
# Invalidate search cache ourselves, Shinobu won't do it since the file is already in the database
invalidate_cache();

return ( 1, $id, $title, $successmsg );
return ( 1, $id, $name, $successmsg );
}

# Download the given URL, using the given Mojo::UserAgent object.
Expand Down
6 changes: 1 addition & 5 deletions lib/LANraragi/Plugin/Metadata/Eze.pm
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,7 @@ sub tags_from_eze_json {
my $tags = $hash->{"gallery_info"}->{"tags"};

# Titles returned by eze are in complete E-H notation.
# We use the parse_name routine used by Auto-Tag to get the title.
my $ogtitle = $hash->{"gallery_info"}->{"title"};

my ( $title, $autotags ) = LANraragi::Utils::Database::parse_name($ogtitle);
my $title = $hash->{"gallery_info"}->{"title"};
remove_spaces($title);

foreach my $namespace ( sort keys %$tags ) {
Expand All @@ -113,7 +110,6 @@ sub tags_from_eze_json {
}

# Add source tag if possible

my $site = $hash->{"gallery_info"}->{"source"}->{"site"};
my $gid = $hash->{"gallery_info"}->{"source"}->{"gid"};
my $gtoken = $hash->{"gallery_info"}->{"source"}->{"token"};
Expand Down
122 changes: 122 additions & 0 deletions lib/LANraragi/Plugin/Metadata/RegexParse.pm
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
package LANraragi::Plugin::Metadata::RegexParse;

use strict;
use warnings;

#Plugins can freely use all Perl packages already installed on the system
#Try however to restrain yourself to the ones already installed for LRR (see tools/cpanfile) to avoid extra installations by the end-user.
use Mojo::JSON qw(from_json);
use File::Basename;

#You can also use the LRR Internal API when fitting.
use LANraragi::Model::Plugins;
use LANraragi::Utils::Database;
use LANraragi::Utils::Logging qw(get_logger);
use LANraragi::Utils::Generic qw(remove_spaces);
use LANraragi::Utils::Archive qw(is_file_in_archive extract_file_from_archive);

#Meta-information about your plugin.
sub plugin_info {

return (
#Standard metadata
name => "Filename Parsing",
type => "metadata",
namespace => "regexplugin",
author => "Difegue",
version => "1.0",
description =>
"Derive tags from the filename of the given archive. Follows the doujinshi naming standard (Release) [Artist] TITLE (Series) [Language].",
icon =>
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABQAAAAUCAYAAACNiR0NAAAAAXNSR0IArs4c6QAAAL1JREFUOI1jZMABpNbH/sclx8DAwPAscDEjNnEMQUIGETIYhUOqYdgMhTPINQzdUEZqGIZsKBM1DEIGTOiuexqwCKdidDl0vtT62P9kuZCJEWuKYWBgYGBgRHbh04BFDNIb4jAUbbSrZTARUkURg6lD10OUC/0PNaMYgs1Skgwk1jCSDCQWoBg46dYmhite0+D8pwGLCMY6uotRDOy8toZBkI2HIhcO/pxCm8KBUkOxFl/kGoq3gCXFYFxVAACeoU/8xSNybwAAAABJRU5ErkJggg==",
parameters => [ { type => "bool", desc => "Save archive title" } ]
);

}

#Mandatory function to be implemented by your plugin
sub get_tags {

shift;
my $lrr_info = shift; # Global info hash
my ($savetitle) = @_; # Plugin parameters

my $logger = get_logger( "regexparse", "plugins" );
my $file = $lrr_info->{file_path};

# Get the filename from the file_path info field
my ( $filename, $filepath, $suffix ) = fileparse( $file, qr/\.[^.]*/ );

my ( $event, $artist, $title, $series, $language );
$event = $artist = $title = $series = $language = "";

#Replace underscores with spaces
$filename =~ s/_/ /g;

#Use the regex on our file, and pipe it to the regexsel sub.
$filename =~ &get_regex;

#Take variables from the regex selection
if ( defined $2 ) { $event = $2; }
if ( defined $4 ) { $artist = $4; }
if ( defined $5 ) { $title = $5; }
if ( defined $7 ) { $series = $7; }
if ( defined $9 ) { $language = $9; }

my @tags = ();

if ( $event ne "" ) {
push @tags, "event:$event";
}

if ( $artist ne "" ) {

#Special case for circle/artist sets:
#If the string contains parenthesis, what's inside those is the artist name
#the rest is the circle.
if ( $artist =~ /(.*) \((.*)\)/ ) {
push @tags, "group:$1";
push @tags, "artist:$2";
} else {
push @tags, "artist:$artist";
}
}

if ( $series ne "" ) {
push @tags, "series:$series";
}

if ( $language ne "" ) {
push @tags, "language:$language";
}

my $tagstring = join( ", ", @tags );

$logger->info("Sending the following tags to LRR: $tagstring");

if ($savetitle) {
$logger->info("Parsed title is $title");
return ( tags => $tagstring, title => $title );
} else {
return ( tags => $tagstring );
}

}

#Regular Expression matching the E-Hentai standard: (Release) [Artist] TITLE (Series) [Language]
#Used in parsing.
#Stuff that's between unescaped ()s is put in a numbered variable: $1,$2,etc
#Parsing is only done the first time the file is found. The parsed info is then stored into Redis.
#Change this regex if you wish to use a different parsing for mass-addition of archives.

#()? indicates the field is optional.
#(\(([^([]+)\))? returns the content of (Release). Optional.
#(\[([^]]+)\])? returns the content of [Artist]. Optional.
#([^([]+) returns the title. Mandatory.
#(\(([^([)]+)\))? returns the content of (Series). Optional.
#(\[([^]]+)\])? returns the content of [Language]. Optional.
#\s* indicates zero or more whitespaces.
my $regex = qr/(\(([^([]+)\))?\s*(\[([^]]+)\])?\s*([^([]+)\s*(\(([^([)]+)\))?\s*(\[([^]]+)\])?/;
sub get_regex { return $regex }
1;
71 changes: 3 additions & 68 deletions lib/LANraragi/Utils/Database.pm
Original file line number Diff line number Diff line change
Expand Up @@ -31,30 +31,17 @@ sub add_archive_to_redis {
$logger->debug("File Name: $name");
$logger->debug("Filesystem Path: $file");

my $title = $name;
my $tags = "";

$redis->hset( $id, "name", encode_utf8($name) );
$redis->hset( $id, "name", encode_utf8($name) );
$redis->hset( $id, "title", encode_utf8($name) );

#Don't encode filenames.
$redis->hset( $id, "file", $file );

#New file in collection, so this flag is set.
$redis->hset( $id, "isnew", "true" );

#Use the mythical regex to get title and tags
#Except if the matching pref is off
if ( LANraragi::Model::Config->get_tagregex eq "1" ) {
( $title, $tags ) = parse_name($name);
$logger->debug("Parsed Title: $title");
$logger->debug("Parsed Tags: $tags");
}

$redis->hset( $id, "title", encode_utf8($title) );
$redis->hset( $id, "tags", encode_utf8($tags) );
$redis->quit;

return ( $name, $title, $tags );
return $name;
}

# build_archive_JSON(redis, id)
Expand Down Expand Up @@ -211,58 +198,6 @@ sub set_title {
$redis->quit;
}

#parse_name(name)
#parses an archive name with the regex specified in the configuration file(get_regex and select_from_regex subs) to find metadata.
sub parse_name {

my ( $event, $artist, $title, $series, $language );
$event = $artist = $title = $series = $language = "";

#Replace underscores with spaces
$_[0] =~ s/_/ /g;

#Use the regex on our file, and pipe it to the regexsel sub.
$_[0] =~ LANraragi::Model::Config->get_regex;

#Take variables from the regex selection
if ( defined $2 ) { $event = $2; }
if ( defined $4 ) { $artist = $4; }
if ( defined $5 ) { $title = $5; }
if ( defined $7 ) { $series = $7; }
if ( defined $9 ) { $language = $9; }

my @tags = ();

if ( $event ne "" ) {
push @tags, "event:$event";
}

if ( $artist ne "" ) {

#Special case for circle/artist sets:
#If the string contains parenthesis, what's inside those is the artist name
#the rest is the circle.
if ( $artist =~ /(.*) \((.*)\)/ ) {
push @tags, "group:$1";
push @tags, "artist:$2";
} else {
push @tags, "artist:$artist";
}
}

if ( $series ne "" ) {
push @tags, "series:$series";
}

if ( $language ne "" ) {
push @tags, "language:$language";
}

my $tagstring = join( ", ", @tags );

return ( $title, $tagstring );
}

#This function is used for all ID computation in LRR.
#Takes the path to the file as an argument.
sub compute_id {
Expand Down
15 changes: 0 additions & 15 deletions templates/templates_config/config_tags.html.tt2
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,6 @@

<tr></tr>

<tr>
<td class="option-td">
<h2 class="ih"> Auto-Tag </h2>
</td>
<td>
[% IF tagregex %]
<input id="tagregex" name="tagregex" class="fa" type="checkbox" checked> [% ELSE %]
<input id="tagregex" name="tagregex" class="fa" type="checkbox"> [% END %]
<label for="tagregex">
<br>If this option is on, we'll try to derive tags from the filename of uploaded archives.
<br>Consider disabling it if you often upload archives that don't follow the doujinshi naming standard.
</label>
</td>
</tr>

<tr>
<td class="option-td">
<h2 class="ih"> Auto-Plugin </h2>
Expand Down

0 comments on commit 1abce98

Please sign in to comment.