From 91269dd5cbab008474ed44aa55624a5b52604d46 Mon Sep 17 00:00:00 2001
From: Nigel Horne <njh@bandsman.co.uk>
Date: Wed, 18 Dec 2024 20:15:54 -0500
Subject: [PATCH] Added bin/find_dups

---
 bin/find_dups | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100755 bin/find_dups

diff --git a/bin/find_dups b/bin/find_dups
new file mode 100755
index 0000000..d2ce329
--- /dev/null
+++ b/bin/find_dups
@@ -0,0 +1,49 @@
+#!/usr/bin/env perl
+
+# Find duplicate people in a gedcom:
+# Usage: find_dups 'Home Person' 'gedcom-file'
+
+use strict;
+use warnings;
+use autodie qw(:all);
+
+# Open the CSV file
+# my $file = 'data.csv';
+# open my $fh, '<', $file or die "Cannot open $file: $!";
+
+open(my $fh, '-|', "../gedcom -AXlh \"$ARGV[0]\" \"$ARGV[1]\"") or die;
+
+# Read the header line
+my $header = <$fh>;
+
+# Store entries in a hash to detect duplicates
+my %seen;
+my @duplicates;
+
+while (my $line = <$fh>) {
+	chomp $line;
+	# Split the line into fields (adjust delimiter if needed)
+	my ($xref, $given_names, $surname, $dob, $dod, $relationship) = split /,/, $line;
+
+	if($dob || $dod) {
+		# Create a unique key based on identifying columns
+		my $key = join('|', $given_names, $surname, $dob || $dod);
+
+		# Check if the key already exists
+		if(exists $seen{$key}) {
+			push @duplicates, $line;
+		} else {
+			$seen{$key} = 1;
+		}
+	}
+}
+
+close $fh;
+
+# Output duplicate entries
+if(scalar @duplicates) {
+	print "Duplicate Entries Found:\n",
+		join("\n", @duplicates), "\n";
+} else {
+	print "No duplicate entries found.\n";
+}