diff --git a/chrome/content/duplicateContactsManager.js b/chrome/content/duplicateContactsManager.js
deleted file mode 100644
index f680f20..0000000
--- a/chrome/content/duplicateContactsManager.js
+++ /dev/null
@@ -1,11 +0,0 @@
-var DuplicateContactsManager = {
- manageDuplicatesIsRunning: false,
- menuButtonAction: function() {
- this.manageDuplicates();
- },
- manageDuplicates: function() {
- this.manageDuplicatesIsRunning = true;
- var dupwindow = window.open('chrome://duplicatecontactsmanager/content/duplicateEntriesWindow.xul', "Duplicate Contacts Manager", "chrome,centerscreen");
- dupwindow.focus();
- }
-}
diff --git a/chrome/content/duplicateEntriesWindow.js b/chrome/content/duplicateEntriesWindow.js
index 290009c..a5c68e6 100644
--- a/chrome/content/duplicateEntriesWindow.js
+++ b/chrome/content/duplicateEntriesWindow.js
@@ -4,6 +4,13 @@
// This file includes UTF-8 encoding. Please make sure your text editor can deal with this prior to saving any changes!
/* Change history:
+ * Version 1.0.4:
+ * - various small UI improvments: indication for card matching, layout, language, doc
+ * Version 1.0.3:
+ * - fixed syntax error in de-DE locale that lead to obscure initialization error
+ * - minor improvements of localization in the extension and of the entry in the TB add-ons list
+ * Version 1.0.2 and 1.0.2:
+ * - improved label of DCM menu entry for address book window
* Version 1.0:
* - major speedup in particular when searching for duplicates in large address books
* - improved user guidance; new Tools menu entry with default address book selection
diff --git a/doc/DuplicateContactsManager-1.png b/doc/DuplicateContactsManager-1.png
new file mode 100644
index 0000000..dd295ad
Binary files /dev/null and b/doc/DuplicateContactsManager-1.png differ
diff --git a/doc/DuplicateContactsManager-2.png b/doc/DuplicateContactsManager-2.png
new file mode 100644
index 0000000..2429b49
Binary files /dev/null and b/doc/DuplicateContactsManager-2.png differ
diff --git a/doc/DuplicateContactsManager-3.png b/doc/DuplicateContactsManager-3.png
new file mode 100644
index 0000000..2836eb8
Binary files /dev/null and b/doc/DuplicateContactsManager-3.png differ
diff --git a/doc/Profile-Photo.png b/doc/Profile-Photo.png
new file mode 100644
index 0000000..6cfecee
Binary files /dev/null and b/doc/Profile-Photo.png differ
diff --git a/doc/index-simple.html b/doc/index-simple.html
new file mode 100644
index 0000000..bc39f3a
--- /dev/null
+++ b/doc/index-simple.html
@@ -0,0 +1,175 @@
+
+
+Duplicate Contacts Manager for Thunderbird by David von Oheimb
+
+
+
+
Duplicate Contacts Manager for Thunderbird
+
+This Thunderbird extension facilitates handling of redundant entries in address books.
+After installation, it can be invoked via the `Tools->Duplicate Contacts Manager...` menu entry.
+One can also customize theToolbar of theAddress Book window with aFind Duplicates button.
+
+The Duplicate Contacts Manager searches address books for matching contact entries,
+also known as cards.
+It can automatically delete all cards that match and have equivalent or less information than some other one.
+Any remaining pairs of matching cards
+are presented to the user as potential duplicates for manual treatment.
+Each two cards are shown side-by-side with a comparison of all fields containing data, including any photo.
+Some important fields are always shown such that they can be filled in when they have been empty so far.
+
+When pairs of candidate duplicates are presented, the reason why they are considered matching is given in the status line.
+
+
The '≡' symbol is shown between fields with identical values.
+All other relations are determined after abstraction of values (see the definitions below).
+
The '≃' symbol is used for indicating matching names, email addresses, or phone numbers.
+
The '≅' symbol is used for indicating equivalent fields and equivalent cards.
+
The '⋦' and '⋧' symbols indicate that a field or a card contains less/more information than the other.
+
The '⊆' and '⊇' symbols indicate the subset/superset relation on email addresses or phone numbers.
+
The '<' and '>' symbols indicate comparison on numerical values or the substring/superstring relation on names and other texts.
+
+
+During manual treatment of a pair of matching cards the user can skip them, can modify one or both of them, and can decide to delete one of them. When a card is deleted and it has a primary email address that is contained in one or more mailing lists and the other card does not have the same primary email address, the address is also deleted from the respective mailing lists.
+
+There are two search modes for finding matching cards:
+
+
within a single address book with n cards, comparing each card with all other cards,
+ resulting in n*(n-1)/2 card comparisons.
+
with two different address books with n and m cards, comparing each card in the first one
+ with each card of the second one, resulting in n*m card comparisons.
+
+
+Two cards are considered matching if any of the following conditions hold, where the details are explained below.
+
+
The cards contain matching names, or
+
they contain matching email addresses, or
+
they contain matching phone numbers, or
+
both cards do not contain any name, email address, or phone number that might match.
+
+Yet cards with non-equivalentAIMScreenName are never considered matching,
+which is convenient for preventing cards from being repeatedly presented for manual treatment.
+
+The matching relation is designed to be rather weak, such that it tends to yield more pairs of candidate duplicates.
+
+Matching of names, email addresses, and phone numbers is based upon
+equivalence of fields modulo abstraction, described below.
+As a result, for example, names differing only in letter case are considered to match.
+For the matching process, names are completed and their order is normalized — for example, if two name parts are detected in theDisplayName
+(e.g., "John Doe") or in an email address (e.g., "John.Doe@company.com"), they are taken as first and last name.
+Both multiple email addresses within a card and multiple phone numbers within a card
+are treated as sets, i.e., their order is ignored as well as their types.
+
+
Two cards are considered to have matching names if
+
+
their DisplayName is not empty and is equivalent, or
+
both their FirstName and their LastName are not empty and are pairwise equivalent, or
+
their DisplayName is empty but their FirstName and LastName are not empty and are equivalent, or
+
in one card theDisplayName is empty and either theFirstName orLastName is not empty and is equivalent to theDisplayName of the other card, or
+
theirAIMScreenName is not empty and is equivalent.
+
+
Two cards are considered to contain matching email address if any of theirPrimaryEmail orSecondEmail are equivalent.
+
Two cards are considered to contain matching phone numbers if any of theirCellularNumber,WorkPhone, orPagerNumber are equivalent. TheHomePhone andFaxNumber fields are not considered for matching because such numbers are often shared by a group of people.
+
+
+Before card fields are compared their values are abstracted using the following steps.
+
+
Pruning, which removes stray contents irrelevant for comparison:
+
+
ignore values of certain field types — the set of ignored fields is configurable
+
+ with the default beingUID, UUID, CardUID, groupDavKey, groupDavVersion, groupDavVersionPrev, RecordKey, DbRowID,
+ PhotoType, PhotoName, LowercasePrimaryEmail, LowercaseSecondEmail, unprocessed:rev, unprocessed:x-ablabel,
+
remove leading/trailing/multiple whitespace and strip non-digit characters from phone numbers,
+
strip any stray email address duplicates from names, which get inserted by some email clients as default names, and
+
replace@googlemail.com by@gmail.com in email addresses.
+
+
Transformation, which re-arranges information for better comparison:
+
+
correct the order of first and last name (for instance, re-order "Doe, John"),
+
move middle initials such as "M" from last name to first name, and
+
move name prefixes such as "von" to the last name.
+
+
Normalization, which equalizes representation variants:
+
+
convert to lowercase (except for name part of AOL email addresses),
+
convert texts by transcribing umlauts and ligatures, and
+
if configured, replace in phone numbers the national trunk prefix (such as '0')
+ with the default country calling code (such as '+49').
+
+
Simplification, which strips less relevant information from texts:
+
+
remove accents and punctuation, and
+
remove singleton digits and letters (such as initials).
+
+
+Corresponding fields in two cards are considered equivalent if their abstracted values are equal.
+
+Note that the value adaptations mentioned above are computed only for the comparison, i.e., they do not change the actual card fields.
+
+If automatic removal is chosen, only cards preferred for deletion (which implies equivalent or less information than some other card; for details see below) are removed.
+When a pair of matching cards is presented for manual inspection, the card flagged by default with red color for removal is
+
+
the one preferred for deletion, or else (i.e., if the cards are not comparable):
+
the one used less frequently (i.e., having a smallerPopularityIndex, or else
+
the one modified/created earlier (i.e., having a smallerLastModifiedDate), or else
+
the one found in the second address book or the one found later in case the two address books are the same.
+
+
+A card is considered to have equivalent or less information than another if for each non-ignored field:
+
+
the field is equivalent to the corresponding field of the other card, or else
+
it is a set and its value is a subset of the corresponding field value of the other card, or
+
it is theFirstName,LastName, orDisplayName and its value is a substring of the corresponding field value of the other card, or
+
it is thePopularityIndex orLastModifiedDate (which are ignored here), or
+
it has the default value, i.e., it is empty for text fields or its value is0 for number fields orfalse for Boolean fields.
+
+For the above field-wise comparison, the email addresses of a card are treated as a set,
+the phone numbers of a card are also treated as a set, and
+the set of names of mailing lists a card belongs to is taken as an additional field.
+
+A card with equivalent or less information than another is preferred for deletion if:
+
+
not all non-ignored fields are equivalent (which implies that it has less information), or else
+
the character weight of the card is smaller, i.e.,
+ its pruned and transformed (non-ignored) field values have an equal or smaller total number of uppercase letters and special characters than the other card, or else the character weight is equal and
+
its PopularityIndex is smaller, or else
+
its LastModifiedDate is smaller.
+
+
+Here is an example.
+The card on the right will be preferred for deletion because it contains less information.
+
+
+ NickName: ........... "Péte" .............................. " pete ! " .................... accent, punctuation, letter case, and whitespace ignored
+
+ FirstName: .......... "Peter" ............................. "Peter Y van" ............ name prefix "van" moved to last name, middle initial "Y" ignored
+
+ LastName: ........... "van Müller" .................. "Mueller" .................... name prefix "van" moved to last name, umlauts transcribed
+
+ DisplayName: .... "Hans Peter van Müller" .. "van Müller, Peter" .. first name moved to the front, name is substring
+
+ PreferDisplayName: .. 'yes' ........................... 'yes' ............................ same truth value
+
+ AimScreenName: ...... "" ................................. "" .................................. same AIM name
+
+ PreferMailFormat: ... 'HTML' ......................... 'unknown' .............. default ('unknown') considered less information
+
+ PrimaryEmail: .... "Peter.vanMueller@company.com" .. "P.van.Mueller@gmx.de" .. emails treated as sets, letter case ignored
+
+ SecondaryEmail: .... "p.van.mueller@gmx.de" .. "" ......................... emails treated as sets, letter case ignored
+
+ WorkPhone: ............. "089/1234-5678" ........ "+49 89 12345678" ... national prefix normalized and non-digits ignored
+
+ PopularityIndex: .... 5 ........................................ 3 ................................... field ignored for information comparison
+
+ LastModifiedDate: .. 2018-02-25 07:51:28 .. 2018-02-25 08:30:37 .. field ignored for information comparison
+
+ UUID: ......................... "" ....................... "903a61be-64d5-4844-802a" ... field ignored
+
+
+Technical information: The options/configuration/preferences used by this Thunderbird extension are are saved in configuration keys starting with `extensions.DuplicateContactsManager.` - for instance, the list of ignored fields is stored in the variable ignoreFields.
+
+
+
+Last modified: Mon Jun 4 00:11:04 CEST 2018
+
diff --git a/doc/index.html b/doc/index.html
new file mode 100644
index 0000000..8c455f8
--- /dev/null
+++ b/doc/index.html
@@ -0,0 +1,315 @@
+
+
+
+
+
+
+
+
+ Duplicate Contacts Manager for Thunderbird by David von Oheimb
+
+
+
+
+
+
+
+
+
+
+This Thunderbird extension facilitates handling of redundant entries in address books.
+After installation it can be invoked via the Tools->Duplicate Contacts Manager... menu entry.
+One can also customize the Toolbar of the Address Book window with a Find Duplicates button.
+
+
+
+
+
+The Duplicate Contacts Manager searches address books for matching contact entries, also known as cards.
+It can automatically delete all cards that match and have equivalent or less information than some other one.
+Any remaining pairs of matching cards
+are as potential duplicates for manual treatment.
+Each two cards are shown side-by-side with a comparison of all fields containing data, including any photo.
+Some important fields are always shown such that they can be filled in when they have been empty so far.
+
+When pairs of candidate duplicates are presented, the reason why they are considered matching is given in the status line.
+
+
+
The '≡' symbol is shown between fields with identical values.
+All other relations are determined after abstraction of values (see the definitions below).
+
The '≃' symbol is used for indicating matching names, email addresses, or phone numbers.
+
The '≅' symbol is used for indicating equivalent fields and equivalent cards.
+
The '⋦' and '⋧' symbols indicate that a field or a whole card contains less/more information than the other.
+
The '⊆' and '⊇' symbols indicate the subset/superset relation on email addresses or phone numbers.
+
The '<' and '>' symbols indicate comparison on numerical values or the substring/superstring relation on names and other texts.
+
+
+During manual treatment of a pair of matching cards the user can skip them, can modify one or both of them, and can decide to delete one of them.
+When a card is deleted and it has a primary email address that is contained in one or more mailing lists and the other card does not have the same primary email address, the address is also deleted from the respective mailing lists.
+
+
Matching contact entries
+
+There are two search modes for finding matching cards:
+
+
within a single address book with n cards, comparing each card with all other cards,
+ resulting in n*(n-1)/2 card comparisons.
+
with two different address books with n and m cards, comparing each card in the first one
+ with each card of the second one, resulting in n*m card comparisons.
+
+
+
+Two cards are considered matching if any of the following conditions hold, where the details are explained below.
+
+
The cards contain matching names, or
+
they contain matching email addresses, or
+
they contain matching phone numbers, or
+
both cards do not contain any name, email address, or phone number that might match.
+
+Yet cards with non-equivalent AIMScreenName are never considered matching,
+which is convenient for preventing cards from being repeatedly presented for manual treatment.
+
+
+The matching relation is designed to be rather weak, such that it tends to yield more pairs of candidate duplicates.
+
+
+Matching of names, email addresses, and phone numbers is based upon equivalence of fields modulo abstraction, described below.
+As a result, for example, names differing only in letter case are considered to match.
+For the matching process, names are completed and their order is normalized —
+for example, if two name parts are detected in the DisplayName (e.g., "John Doe")
+r in an email address (e.g., "John.Doe@company.com"), they are taken as first and last name.
+Both multiple email addresses within a card and multiple phone numbers within a card
+are treated as sets, i.e., their order is ignored as well as their types.
+
+
Two cards are considered to have matching names if
+
+
their DisplayName is not empty and is equivalent, or
+
both their FirstName and their LastName are not empty and are pairwise equivalent, or
+
their DisplayName is empty but their FirstName and LastName are not empty and are pairwise equivalent, or
+
in one card the DisplayName is empty and either the FirstName or LastName is not empty and is equivalent to the DisplayName of the other card, or
+
their AIMScreenNames are not empty and are equivalent.
+
+
Two cards are considered to contain matching email address if any of their PrimaryEmail or SecondEmail are equivalent.
+
Two cards are considered to contain matching phone numbers if any of their CellularNumber, WorkPhone, or PagerNumber are equivalent. The HomePhone and FaxNumber fields are not considered for matching because such numbers are often shared by a group of people.
+
+
+
Abstraction of field values
+
+Before card fields are compared their values are abstracted using the following steps.
+
+
Pruning, which removes stray contents irrelevant for comparison:
+
+
ignore values of certain field types — the set of ignored fields is configurable
+
+ with the default being UID, UUID, CardUID, groupDavKey, groupDavVersion, groupDavVersionPrev, RecordKey, DbRowID,
+ PhotoType, PhotoName, LowercasePrimaryEmail, LowercaseSecondEmail, unprocessed:rev, unprocessed:x-ablabel,
+
remove leading/trailing/multiple whitespace and strip non-digit characters from phone numbers,
+
strip any stray email address duplicates from names, which get inserted by some email clients as default names, and
+
replace @googlemail.com by @gmail.com in email addresses.
+
+
Transformation, which re-arranges information for better comparison:
+
+
correct the order of first and last name (for instance, re-order "Doe, John"),
+
move middle initials such as "M" from last name to first name, and
+
move name prefixes such as "von" to the last name.
+
+
Normalization, which equalizes representation variants:
+
+
convert to lowercase (except for name part of AOL email addresses),
+
convert texts by transcribing umlauts and ligatures, and
+
if configured, replace in phone numbers the national trunk prefix (such as '0')
+ with the default country calling code (such as '+49').
+
+
Simplification, which strips less relevant information from texts:
+
+
remove accents and punctuation, and
+
remove singleton digits and letters (such as initials).
+
+
+Corresponding fields in two cards are considered equivalent if their abstracted values are equal.
+
+Note that the value adaptations mentioned above are computed only for the comparison, i.e., they do not change the actual card fields.
+
+
+If automatic removal is chosen, only cards preferred for deletion (which implies equivalent or less information than some other card; for details see below) are removed.
+When a pair of matching cards is presented for manual inspection, the card flagged by default with red color for removal is
+
+
the one preferred for deletion, or else (i.e., if the cards are not comparable):
+
the one used less frequently (i.e., having a smaller PopularityIndex, or else
+
the one modified/created earlier (i.e., having a smaller LastModifiedDate), or else
+
the one found in the second address book or the one found later in case the two address books are the same.
+
+
+
Equivalence of information
+
+A card is considered to have equivalent or less information than another if for each non-ignored field:
+
+
the field is equivalent to the corresponding field of the other card, or else
+
it is a set and its value is a subset of the corresponding field value of the other card, or
+
it is the FirstName, LastName, or DisplayName and its value is a substring of the corresponding field value of the other card, or
+
it is the PopularityIndex or LastModifiedDate (which are ignored here), or
+
it has the default value, i.e., it is empty for text fields or its value is 0 for number fields or false for Boolean fields.
+
+For the above field-wise comparison, the email addresses of a card are treated as a set,
+the phone numbers of a card are also treated as a set, and
+the set of names of mailing lists a card belongs to is taken as an additional field.
+
+
+A card with equivalent or less information than another is preferred for deletion if:
+
+
not all non-ignored fields are equivalent (which implies that it has less information), or else
+
the character weight of the card is smaller, i.e.,
+ its pruned and transformed (non-ignored) field values have an equal or smaller total number of uppercase letters and special characters than the other card, or else the character weight is equal and
+
its PopularityIndex is smaller, or else
+
its LastModifiedDate is smaller.
+
+
+
+Here is an example.
+
+
+
+The card on the right will be preferred for deletion because it contains less information.
+
+
+
NickName:
"Péte"
" pete ! "
accent, punctuation, letter case, and whitespace ignored
+
+
FirstName:
"Peter"
"Peter Y van"
name prefix "van" moved to last name, middle initial "Y" ignored
+
+
LastName:
"van Müller"
"Mueller"
name prefix "van" moved to last name, umlauts transcribed
+
+
DisplayName:
"Hans Peter van Müller"
"van Müller, Peter"
first name moved to the front, name is substring
+
+
PreferDisplayName:
'yes'
'yes'
same truth value
+
+
AimScreenName:
""
""
same AIM name
+
+
PreferMailFormat:
'HTML'
'unknown'
default ('unknown') considered less information
+
+
PrimaryEmail:
"Peter.vanMueller@company.com"
"P.van.Mueller@gmx.de"
emails treated as sets, letter case ignored
+
+
SecondaryEmail:
"p.van.mueller@gmx.de"
""
emails treated as sets, letter case ignored
+
+
WorkPhone:
"089/1234-5678"
"+49 89 12345678"
national prefix normalized and non-digits ignored
+
+
PopularityIndex:
5
3
field ignored for infomation comparison
+
+
LastModifiedDate:
2018-02-25 07:51:28
2018-02-25 08:30:37
field ignored for information comparison
+
+
UUID:
""
"903a61be-64d5-4844-802a"
field ignored
+
+
+
+
+
+
Configuration variables
+
+The options/configuration/preferences used by this Thunderbird extension are are saved in configuration keys starting with extensions.DuplicateContactsManager. —
+for instance, the list of ignored fields is stored in the variable ignoreFields.
+
+
+
Work on this extension apparently has been stopped by end of 2012.
+ Meanwhile, mixed user experience piled up on the official Thunderbird add-on feedback page.
+
+ Recently I faced a major challenge: my address book with some 1.200 entries
+ got inflated by a buggy CardDAV online sync tool to more than 17.000 cards.
+ The new copies contained new types of automatically generated identification meta fields.
+ When I tried to clean the mess automatically using Duplicate Contact Manager,
+ this did not work because it considered the copies different due to the new identifiers.
+ So I added to the extension a configurable list of field types ignored during comparison.
+ Doing so, I started fixing several issues and adding further features:
+
+
+
major speedup in particular when searching for duplicates in large address books
+
improved user guidance; new Tools menu entry with default address book selection
+
various improvements of content matching and card comparison for equivalence
+
cards may be excluded from being shown as matching by setting a different AIM name
+
photos are compared for equality and are shown during manual inspection
+
mailing list membership is taken into account for comparison and shown during inspection
+
during manual inspection, field-by-field (resp. set-by-set) comparison information is shown
+
option to consider phone numbers with national prefix and with default country code equivalent
+
option to customize list of ignored fields; output summary of different fields
+
option to preserve entries of first address book when auto-deleting redundant entries
+
options are saved in TB configuration/preferences at extensions.DuplicateContactsManager.*
+
TODO: add option to prune and transform contents of individual or all cards
+
TODO: add option to automatically and/or manually merge fields (e.g., buttons with arrow)
+
TODO: check and improve French and Spanish translations of new texts
+
+
+
Part of the original post of 2012-04-07, introducing version 0.9:
+
+The so far available Version 0.8.2 was a good starting point,
+but since I urgently needed a more sophisticated tool, I started improving it myself.
+My changes have been motivated — and validated — using my personal
+address book with some pretty diligently manually managed 1.000 entries and using
+the automatically generated collected address book with some 2.500 entries
+including many duplicates and weird variants of names etc.
+The change log is:
+
+
fields of retained duplicate entry can be edited
+
can compare across two different address books
+
new option to first collect all duplicates and then handle them
+
card matching is less aggressive and more fault tolerant:
+ only very likely duplicates — and more of these — are presented
+
the less complete duplicate is selected for removal by default
+
automatic removal option now removes also less complete duplicates
+
made the overall search process interruptable and repeatable
+
moved Thunderbird menu entry into Address Book → Tools
+
many other small improvements, e.g., on progress bar and final info
+
internal: major code cleanup (would be still a lot TODO)
+
TODO for others: check/update/improve French and Spanish translations
+
TODO for others: update online documentation using the above text
+