Skip to content

Commit

Permalink
Add a remote command for batch duplicate finding.
Browse files Browse the repository at this point in the history
  • Loading branch information
porridge committed Dec 2, 2024
1 parent c74ba34 commit acbb426
Show file tree
Hide file tree
Showing 7 changed files with 239 additions and 0 deletions.
114 changes: 114 additions & 0 deletions src/command-line-handling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@

#include "command-line-handling.h"

#include <map>
#include <string.h>
#include <sys/wait.h>
#include <vector>

#include "cache-maint.h"
Expand All @@ -40,6 +43,7 @@
#include "main-defines.h"
#include "main.h"
#include "misc.h"
#include "pic_equiv.h"
#include "pixbuf-renderer.h"
#include "rcfile.h"
#include "secure-save.h"
Expand Down Expand Up @@ -407,6 +411,113 @@ void gq_delay(GtkApplication *, GApplicationCommandLine *app_command_line, GVari
options->slideshow.delay = static_cast<gint>(n * 10.0 + 0.01);
}

static void gq_duplicates_threshold(GtkApplication *, GApplicationCommandLine *app_command_line, GVariantDict *command_line_options_dict, GList *)
{
const gint thresh_min = 0, thresh_max = 100;
gint thresh = 0;
gboolean res;

res = g_variant_dict_lookup(command_line_options_dict, "duplicates-threshold", "i", &thresh);
if (res)
{
if (thresh < thresh_min || thresh > thresh_max)
{
printf_term(TRUE, "Image similarity threshold " BOLD_ON "%d" BOLD_OFF " out of range (%d to %d)\n", thresh, thresh_min, thresh_max);
return;
}
}
else
{
thresh = 99;
}

options->duplicates_similarity_threshold = static_cast<guint>(thresh);
DEBUG_1("threshold set to %d", options->duplicates_similarity_threshold);
}

void gq_duplicates_program(GtkApplication *, GApplicationCommandLine *, GVariantDict *command_line_options_dict, GList *)
{
gchar *text = nullptr;

g_variant_dict_lookup(command_line_options_dict, "duplicates-program", "&s", &text);

g_free(options->duplicates_program);
options->duplicates_program = g_strdup(text);
DEBUG_1("duplicates program set to \"%s\"", options->duplicates_program);
}

void gq_process_duplicates(GtkApplication *, GApplicationCommandLine *, GVariantDict *, GList *file_list)
{
std::map<std::string, std::unique_ptr<pic_equiv>> pics;
for (GList *work = file_list; work; work = work->next)
{
const char *fd = static_cast<const char *>(work->data);
std::string name(fd);
pics[name] = std::make_unique<pic_equiv>(fd);
}
DEBUG_1("processing %d files in set", pics.size());

// Compute similarity score for every pair, build equivalence sets.
for (auto outer_iter = pics.begin(); outer_iter != pics.end(); ++outer_iter)
{
auto inner_iter = outer_iter;
inner_iter++;
for (; inner_iter != pics.end(); ++inner_iter)
{
double similarity = outer_iter->second->compare(*inner_iter->second);
DEBUG_1("%s vs %s: %f", outer_iter->second->name.c_str(), inner_iter->second->name.c_str(), similarity);
if (similarity < options->duplicates_similarity_threshold)
continue;
outer_iter->second->equivalent.insert(inner_iter->second->equivalent.begin(), inner_iter->second->equivalent.end());
for (auto const &sibling : outer_iter->second->equivalent)
{
pics[sibling]->equivalent.insert(outer_iter->second->equivalent.begin(), outer_iter->second->equivalent.end());
}
}
}

std::set<std::string> processed;
for (auto const &pic : pics)
{
if (pic.second->equivalent.size() < 2)
// skip this pic if not similar to any other one but itself
continue;
if (processed.find(pic.second->name) != processed.end())
// skip this pic if it was already processed (when processing a similar image)
continue;
std::vector<const char *> cmd;
cmd.push_back(options->duplicates_program);
for (auto const &sibling : pic.second->equivalent)
{
cmd.push_back(sibling.c_str());
processed.insert(sibling);
}
cmd.push_back(nullptr);
pid_t pid = fork();
if (pid == -1)
{
log_printf("failed creating child process: %s\n", strerror(errno));
return;
}
else if (pid == 0)
{
execvp(const_cast<char *>(cmd[0]), const_cast<char **>(&(cmd[0])));
perror("execv");
exit(1);
}
else
{
int status;
wait(&status);
if (!WIFEXITED(status) || WEXITSTATUS(status) != 0)
{
log_printf("subprocess failed, aborting further duplicate processing\n");
return;
}
}
}
}

void file_load_no_raise(const gchar *text, GApplicationCommandLine *app_command_line)
{
gchar *filename;
Expand Down Expand Up @@ -1452,6 +1563,8 @@ CommandLineOptionEntry command_line_options[] =
{ "debug", gq_debug, PRIMARY_REMOTE, GUI },
#endif
{ "delay", gq_delay, PRIMARY_REMOTE, GUI },
{ "duplicates-program", gq_duplicates_program, PRIMARY_REMOTE, GUI },
{ "duplicates-threshold", gq_duplicates_threshold, PRIMARY_REMOTE, GUI },
{ "file", gq_file, PRIMARY_REMOTE, GUI },
{ "File", gq_File, PRIMARY_REMOTE, GUI },
{ "first", gq_first, PRIMARY_REMOTE, GUI },
Expand All @@ -1477,6 +1590,7 @@ CommandLineOptionEntry command_line_options[] =
{ "new-window", gq_new_window, PRIMARY_REMOTE, GUI },
{ "next", gq_next, PRIMARY_REMOTE, GUI },
{ "pixel-info", gq_pixel_info, REMOTE , N_A },
{ "process-duplicates", gq_process_duplicates, PRIMARY_REMOTE, TEXT },
{ "quit", gq_quit, PRIMARY_REMOTE, GUI },
{ "raise", gq_raise, PRIMARY_REMOTE, GUI },
{ "selection-add", gq_selection_add, REMOTE , N_A },
Expand Down
3 changes: 3 additions & 0 deletions src/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,8 @@ GOptionEntry command_line_options[] =
{ "debug" , 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_INT , nullptr, _("turn on debug output") , "[level]" },
#endif
{ "delay" , 'd', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, nullptr, _("set slide show delay to Hrs Mins N.M seconds,") , "<[H:][M:][N][.M]>" },
{ "duplicates-program" , 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, nullptr, _("run program with each identified set of duplicate images, by default 'echo'") , "<PROGRAM>" },
{ "duplicates-threshold" , 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_INT, nullptr, _("set similarity threshold (0-100) for what is considered a duplicate") , "<N>" },
{ "file" , 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, nullptr, _("open FILE or URL bring Geeqie window to the top") , "<FILE>|<URL>" },
{ "File" , 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, nullptr, _("open FILE or URL do not bring Geeqie window to the top") , "<FILE>|<URL>" },
{ "first" , 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE , nullptr, _("first image") , nullptr },
Expand Down Expand Up @@ -174,6 +176,7 @@ GOptionEntry command_line_options[] =
{ "next" , 'n', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE , nullptr, _("next image") , nullptr },
{ "pixel-info" , 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE , nullptr, _("print pixel info of mouse pointer on current image") , nullptr },
{ "print0" , 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE , nullptr, _("terminate returned data with null character instead of newline") , nullptr },
{ "process-duplicates" , 'p', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE , nullptr, _("group duplicate pictures in current collection and process them") , nullptr },
{ "quit" , 'q', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE , nullptr, _("quit") , nullptr },
{ "raise" , 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE , nullptr, _("bring the Geeqie window to the top") , nullptr },
{ "selection-add" , 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, nullptr, _("adds the current file (or the specified file) to the current selection") ,"[<FILE>]" },
Expand Down
2 changes: 2 additions & 0 deletions src/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ main_sources = files('advanced-exif.cc',
'osd.cc',
'osd.h',
'pan-view.h',
'pic_equiv.cc',
'pic_equiv.h',
'pixbuf-renderer.cc',
'pixbuf-renderer.h',
'pixbuf-util.cc',
Expand Down
1 change: 1 addition & 0 deletions src/options.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ ConfOptions *init_options(ConfOptions *options)
options->dnd_icon_size = 48;
options->dnd_default_action = DND_ACTION_ASK;
options->duplicates_similarity_threshold = 99;
options->duplicates_program = g_strdup("echo");
options->rot_invariant_sim = TRUE;
options->sort_totals = FALSE;
options->rectangle_draw_aspect_ratio = RECTANGLE_DRAW_ASPECT_RATIO_NONE;
Expand Down
1 change: 1 addition & 0 deletions src/options.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ struct ConfOptions

guint duplicates_similarity_threshold;
guint duplicates_match;
gchar *duplicates_program;
gboolean duplicates_thumbnails;
guint duplicates_select_type;
gboolean rot_invariant_sim;
Expand Down
68 changes: 68 additions & 0 deletions src/pic_equiv.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright (C) 2024 The Geeqie Team
*
* Author: Marcin Owsiany <[email protected]>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
*
* Helper class for computing equivalence sets of pictures.
*
*/

#include "pic_equiv.h"

/**
* @param cname path to picture file to represent
*/
pic_equiv::pic_equiv(char const *cname) : name(cname), equivalent{name}, sim(load_image_sim(cname)) {}

ImageSimilarityData *pic_equiv::load_image_sim(const char *cname)
{
g_autoptr(GError) error = nullptr;
g_autoptr(GdkPixbuf) buf = gdk_pixbuf_new_from_file(cname, &error);
if (error)
{
fprintf(stderr, "Unable to read file %s: %s\n", cname, error->message);
return nullptr;
}
return image_sim_new_from_pixbuf(buf);
}

pic_equiv::~pic_equiv()
{
if (sim != nullptr)
image_sim_free(sim);
}

/**
* @brief orders two pic_equiv objects, according to the paths they represent
*/
int operator<(const pic_equiv &a, const pic_equiv &b)
{
return a.name < b.name;
}

/**
* @brief compares this pic_equiv object to another
* @param other object to compare to
* @returns a number between 0 and 100 which denotes visual similarity
*/
gdouble pic_equiv::compare(const pic_equiv &other)
{
if (sim == nullptr || other.sim == nullptr)
return 0.0;
return 100.0 * image_sim_compare(sim, other.sim);
}
50 changes: 50 additions & 0 deletions src/pic_equiv.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
/*
* Copyright (C) 2024 The Geeqie Team
*
* Author: Marcin Owsiany <[email protected]>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*
*
* Helper class for computing equivalence sets of pictures.
*
*/

#include <set>
#include <string>

#include <gdk-pixbuf/gdk-pixbuf.h>
#include <glib/gtypes.h>

#include "similar.h"

/**
* @class pic_equiv
* @brief holds a picture's similarity data, as well as its equivalence set, and allows comparing pictures.
*/
class pic_equiv {
public:
explicit pic_equiv(char const *cname);
~pic_equiv();
pic_equiv(const pic_equiv& other) = delete;
pic_equiv& operator=(const pic_equiv& other) = delete;
gdouble compare(const pic_equiv&);
std::string name;
std::set<std::string> equivalent;
private:
ImageSimilarityData *sim;
static ImageSimilarityData *load_image_sim(const char *cname);
friend int operator<(const pic_equiv &a, const pic_equiv &b);
};

0 comments on commit acbb426

Please sign in to comment.