From acbb426e49dd4870817c2e9afd7e691d40edbe48 Mon Sep 17 00:00:00 2001 From: Marcin Owsiany Date: Fri, 27 Sep 2024 19:56:19 +0200 Subject: [PATCH] Add a remote command for batch duplicate finding. Based on https://github.com/porridge/image-duplicate-finder --- src/command-line-handling.cc | 114 +++++++++++++++++++++++++++++++++++ src/main.cc | 3 + src/meson.build | 2 + src/options.cc | 1 + src/options.h | 1 + src/pic_equiv.cc | 68 +++++++++++++++++++++ src/pic_equiv.h | 50 +++++++++++++++ 7 files changed, 239 insertions(+) create mode 100644 src/pic_equiv.cc create mode 100644 src/pic_equiv.h diff --git a/src/command-line-handling.cc b/src/command-line-handling.cc index 6a280339..ee6a89f4 100644 --- a/src/command-line-handling.cc +++ b/src/command-line-handling.cc @@ -20,6 +20,9 @@ #include "command-line-handling.h" +#include +#include +#include #include #include "cache-maint.h" @@ -40,6 +43,7 @@ #include "main-defines.h" #include "main.h" #include "misc.h" +#include "pic_equiv.h" #include "pixbuf-renderer.h" #include "rcfile.h" #include "secure-save.h" @@ -407,6 +411,113 @@ void gq_delay(GtkApplication *, GApplicationCommandLine *app_command_line, GVari options->slideshow.delay = static_cast(n * 10.0 + 0.01); } +static void gq_duplicates_threshold(GtkApplication *, GApplicationCommandLine *app_command_line, GVariantDict *command_line_options_dict, GList *) +{ + const gint thresh_min = 0, thresh_max = 100; + gint thresh = 0; + gboolean res; + + res = g_variant_dict_lookup(command_line_options_dict, "duplicates-threshold", "i", &thresh); + if (res) + { + if (thresh < thresh_min || thresh > thresh_max) + { + printf_term(TRUE, "Image similarity threshold " BOLD_ON "%d" BOLD_OFF " out of range (%d to %d)\n", thresh, thresh_min, thresh_max); + return; + } + } + else + { + thresh = 99; + } + + options->duplicates_similarity_threshold = static_cast(thresh); + DEBUG_1("threshold set to %d", options->duplicates_similarity_threshold); +} + +void gq_duplicates_program(GtkApplication *, GApplicationCommandLine *, GVariantDict *command_line_options_dict, GList *) +{ + gchar *text = nullptr; + + g_variant_dict_lookup(command_line_options_dict, "duplicates-program", "&s", &text); + + g_free(options->duplicates_program); + options->duplicates_program = g_strdup(text); + DEBUG_1("duplicates program set to \"%s\"", options->duplicates_program); +} + +void gq_process_duplicates(GtkApplication *, GApplicationCommandLine *, GVariantDict *, GList *file_list) +{ + std::map> pics; + for (GList *work = file_list; work; work = work->next) + { + const char *fd = static_cast(work->data); + std::string name(fd); + pics[name] = std::make_unique(fd); + } + DEBUG_1("processing %d files in set", pics.size()); + + // Compute similarity score for every pair, build equivalence sets. + for (auto outer_iter = pics.begin(); outer_iter != pics.end(); ++outer_iter) + { + auto inner_iter = outer_iter; + inner_iter++; + for (; inner_iter != pics.end(); ++inner_iter) + { + double similarity = outer_iter->second->compare(*inner_iter->second); + DEBUG_1("%s vs %s: %f", outer_iter->second->name.c_str(), inner_iter->second->name.c_str(), similarity); + if (similarity < options->duplicates_similarity_threshold) + continue; + outer_iter->second->equivalent.insert(inner_iter->second->equivalent.begin(), inner_iter->second->equivalent.end()); + for (auto const &sibling : outer_iter->second->equivalent) + { + pics[sibling]->equivalent.insert(outer_iter->second->equivalent.begin(), outer_iter->second->equivalent.end()); + } + } + } + + std::set processed; + for (auto const &pic : pics) + { + if (pic.second->equivalent.size() < 2) + // skip this pic if not similar to any other one but itself + continue; + if (processed.find(pic.second->name) != processed.end()) + // skip this pic if it was already processed (when processing a similar image) + continue; + std::vector cmd; + cmd.push_back(options->duplicates_program); + for (auto const &sibling : pic.second->equivalent) + { + cmd.push_back(sibling.c_str()); + processed.insert(sibling); + } + cmd.push_back(nullptr); + pid_t pid = fork(); + if (pid == -1) + { + log_printf("failed creating child process: %s\n", strerror(errno)); + return; + } + else if (pid == 0) + { + execvp(const_cast(cmd[0]), const_cast(&(cmd[0]))); + perror("execv"); + exit(1); + } + else + { + int status; + wait(&status); + if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) + { + log_printf("subprocess failed, aborting further duplicate processing\n"); + return; + } + } + } +} + void file_load_no_raise(const gchar *text, GApplicationCommandLine *app_command_line) { gchar *filename; @@ -1452,6 +1563,8 @@ CommandLineOptionEntry command_line_options[] = { "debug", gq_debug, PRIMARY_REMOTE, GUI }, #endif { "delay", gq_delay, PRIMARY_REMOTE, GUI }, + { "duplicates-program", gq_duplicates_program, PRIMARY_REMOTE, GUI }, + { "duplicates-threshold", gq_duplicates_threshold, PRIMARY_REMOTE, GUI }, { "file", gq_file, PRIMARY_REMOTE, GUI }, { "File", gq_File, PRIMARY_REMOTE, GUI }, { "first", gq_first, PRIMARY_REMOTE, GUI }, @@ -1477,6 +1590,7 @@ CommandLineOptionEntry command_line_options[] = { "new-window", gq_new_window, PRIMARY_REMOTE, GUI }, { "next", gq_next, PRIMARY_REMOTE, GUI }, { "pixel-info", gq_pixel_info, REMOTE , N_A }, + { "process-duplicates", gq_process_duplicates, PRIMARY_REMOTE, TEXT }, { "quit", gq_quit, PRIMARY_REMOTE, GUI }, { "raise", gq_raise, PRIMARY_REMOTE, GUI }, { "selection-add", gq_selection_add, REMOTE , N_A }, diff --git a/src/main.cc b/src/main.cc index 7f63e049..78234d89 100644 --- a/src/main.cc +++ b/src/main.cc @@ -147,6 +147,8 @@ GOptionEntry command_line_options[] = { "debug" , 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_INT , nullptr, _("turn on debug output") , "[level]" }, #endif { "delay" , 'd', G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, nullptr, _("set slide show delay to Hrs Mins N.M seconds,") , "<[H:][M:][N][.M]>" }, + { "duplicates-program" , 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, nullptr, _("run program with each identified set of duplicate images, by default 'echo'") , "" }, + { "duplicates-threshold" , 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_INT, nullptr, _("set similarity threshold (0-100) for what is considered a duplicate") , "" }, { "file" , 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, nullptr, _("open FILE or URL bring Geeqie window to the top") , "|" }, { "File" , 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, nullptr, _("open FILE or URL do not bring Geeqie window to the top") , "|" }, { "first" , 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE , nullptr, _("first image") , nullptr }, @@ -174,6 +176,7 @@ GOptionEntry command_line_options[] = { "next" , 'n', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE , nullptr, _("next image") , nullptr }, { "pixel-info" , 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE , nullptr, _("print pixel info of mouse pointer on current image") , nullptr }, { "print0" , 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE , nullptr, _("terminate returned data with null character instead of newline") , nullptr }, + { "process-duplicates" , 'p', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE , nullptr, _("group duplicate pictures in current collection and process them") , nullptr }, { "quit" , 'q', G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE , nullptr, _("quit") , nullptr }, { "raise" , 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_NONE , nullptr, _("bring the Geeqie window to the top") , nullptr }, { "selection-add" , 0, G_OPTION_FLAG_NONE, G_OPTION_ARG_STRING, nullptr, _("adds the current file (or the specified file) to the current selection") ,"[]" }, diff --git a/src/meson.build b/src/meson.build index 4de15e6b..89a9fab1 100644 --- a/src/meson.build +++ b/src/meson.build @@ -130,6 +130,8 @@ main_sources = files('advanced-exif.cc', 'osd.cc', 'osd.h', 'pan-view.h', +'pic_equiv.cc', +'pic_equiv.h', 'pixbuf-renderer.cc', 'pixbuf-renderer.h', 'pixbuf-util.cc', diff --git a/src/options.cc b/src/options.cc index 19c8ca0d..95fe86b6 100644 --- a/src/options.cc +++ b/src/options.cc @@ -66,6 +66,7 @@ ConfOptions *init_options(ConfOptions *options) options->dnd_icon_size = 48; options->dnd_default_action = DND_ACTION_ASK; options->duplicates_similarity_threshold = 99; + options->duplicates_program = g_strdup("echo"); options->rot_invariant_sim = TRUE; options->sort_totals = FALSE; options->rectangle_draw_aspect_ratio = RECTANGLE_DRAW_ASPECT_RATIO_NONE; diff --git a/src/options.h b/src/options.h index 6fdd37c7..bc41241b 100644 --- a/src/options.h +++ b/src/options.h @@ -79,6 +79,7 @@ struct ConfOptions guint duplicates_similarity_threshold; guint duplicates_match; + gchar *duplicates_program; gboolean duplicates_thumbnails; guint duplicates_select_type; gboolean rot_invariant_sim; diff --git a/src/pic_equiv.cc b/src/pic_equiv.cc new file mode 100644 index 00000000..d5149ce0 --- /dev/null +++ b/src/pic_equiv.cc @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2024 The Geeqie Team + * + * Author: Marcin Owsiany + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * + * Helper class for computing equivalence sets of pictures. + * + */ + +#include "pic_equiv.h" + +/** + * @param cname path to picture file to represent + */ +pic_equiv::pic_equiv(char const *cname) : name(cname), equivalent{name}, sim(load_image_sim(cname)) {} + +ImageSimilarityData *pic_equiv::load_image_sim(const char *cname) +{ + g_autoptr(GError) error = nullptr; + g_autoptr(GdkPixbuf) buf = gdk_pixbuf_new_from_file(cname, &error); + if (error) + { + fprintf(stderr, "Unable to read file %s: %s\n", cname, error->message); + return nullptr; + } + return image_sim_new_from_pixbuf(buf); +} + +pic_equiv::~pic_equiv() +{ + if (sim != nullptr) + image_sim_free(sim); +} + +/** + * @brief orders two pic_equiv objects, according to the paths they represent + */ +int operator<(const pic_equiv &a, const pic_equiv &b) +{ + return a.name < b.name; +} + +/** + * @brief compares this pic_equiv object to another + * @param other object to compare to + * @returns a number between 0 and 100 which denotes visual similarity + */ +gdouble pic_equiv::compare(const pic_equiv &other) +{ + if (sim == nullptr || other.sim == nullptr) + return 0.0; + return 100.0 * image_sim_compare(sim, other.sim); +} diff --git a/src/pic_equiv.h b/src/pic_equiv.h new file mode 100644 index 00000000..473d06ff --- /dev/null +++ b/src/pic_equiv.h @@ -0,0 +1,50 @@ +/* + * Copyright (C) 2024 The Geeqie Team + * + * Author: Marcin Owsiany + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * + * Helper class for computing equivalence sets of pictures. + * + */ + +#include +#include + +#include +#include + +#include "similar.h" + +/** + * @class pic_equiv + * @brief holds a picture's similarity data, as well as its equivalence set, and allows comparing pictures. + */ +class pic_equiv { +public: + explicit pic_equiv(char const *cname); + ~pic_equiv(); + pic_equiv(const pic_equiv& other) = delete; + pic_equiv& operator=(const pic_equiv& other) = delete; + gdouble compare(const pic_equiv&); + std::string name; + std::set equivalent; +private: + ImageSimilarityData *sim; + static ImageSimilarityData *load_image_sim(const char *cname); +friend int operator<(const pic_equiv &a, const pic_equiv &b); +};