From 0f0d8425b96ce4bc92377b6ee135d3f1e6d0b2b3 Mon Sep 17 00:00:00 2001
From: Henrique Musseli Cezar <henrique.musseli@gmail.com>
Date: Tue, 7 Nov 2023 14:12:58 +0100
Subject: [PATCH 01/18] Fix compute_rdf_aggregates without names

---
 utils/compute_rdf_aggregates.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py
index 53e37ced..12b4b5ee 100644
--- a/utils/compute_rdf_aggregates.py
+++ b/utils/compute_rdf_aggregates.py
@@ -394,6 +394,8 @@ def compute_rdfs(
     
     if args.name_selections and (len(args.selections) != len(args.name_selections)):
         raise AssertionError("the number of selections must be equal to the number of names")
+    elif not args.name_selections:
+        args.name_selections = args.selections
 
     topol = process_topology(args.topol)
 

From 21afffb6fd57d54e8ba42ae0b07e69b893e9f3c1 Mon Sep 17 00:00:00 2001
From: Henrique Musseli Cezar <henrique.musseli@gmail.com>
Date: Fri, 29 Dec 2023 09:51:01 +0100
Subject: [PATCH 02/18] Fix pddf

---
 utils/compute_rdf_aggregates.py | 47 ++++++++++++++++++++++++++-------
 1 file changed, 38 insertions(+), 9 deletions(-)

diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py
index 12b4b5ee..0eeaa4ab 100644
--- a/utils/compute_rdf_aggregates.py
+++ b/utils/compute_rdf_aggregates.py
@@ -1,5 +1,6 @@
 import MDAnalysis as mda
 from MDAnalysis.lib import distances
+from scipy.spatial.distance import squareform
 import numpy as np
 import matplotlib.pyplot as plt
 import os
@@ -184,13 +185,41 @@ def compute_rdfs(
                     result=cond_distmat,
                     backend="OpenMP",
                 )
+                # sqdistmat = squareform(cond_distmat)
 
-                pddf_i, pddf_edges = np.histogram(
-                    cond_distmat,
-                    bins=nbins,
-                    range=(0, rmax),
-                    density=False,
-                )
+                # load the toml file with the form factors
+                with open(compute_pddf, "rb") as f:
+                    form_factors = tomli.load(f)
+
+                # Create a 2D array of form factors for all pairs of atoms
+                form_factors_array = np.array([form_factors[atom.residue.resname][atom.name][0] for atom in agg_sel])
+
+                # Compute the weights for all pairs of atoms
+                weights_matrix = np.outer(form_factors_array, form_factors_array)
+
+                # Get the upper triangular part of the weights matrix
+                weights = weights_matrix[np.triu_indices(n, k=1)]
+
+                # Compute the histogram
+                pddf_i, pddf_edges = np.histogram(cond_distmat, bins=nbins, range=(0, rmax), weights=weights, density=False)
+
+                # deltaR = rmax / nbins
+                # pddf_i = np.zeros((nbins,), dtype=np.float64)
+                # pddf_dists = np.linspace(deltaR/2.0, rmax - deltaR/2.0, nbins)
+                # for i in range(n):
+                #     for j in range(i + 1, n):
+                #         if sqdistmat[i][j] <= rmax:
+                #             bin_distance = np.floor(sqdistmat[i][j]/deltaR)
+                        
+                #         pddf_i[int(bin_distance)] += form_factors[agg_sel[i].residue.resname][agg_sel[i].name][0] * form_factors[agg_sel[j].residue.resname][agg_sel[j].name][0]
+
+
+                # pddf_i, pddf_edges = np.histogram(
+                #     cond_distmat,
+                #     bins=nbins,
+                #     range=(0, rmax),
+                #     density=False,
+                # )
 
                 pddf += pddf_i
 
@@ -382,9 +411,9 @@ def compute_rdfs(
     )
     parser.add_argument(
         "--compute-pddf",
-        action="store_true",
-        default=False,
-        help="compute the pair distance distribution function (PDDF)",
+        type=str,
+        default=None,
+        help="compute the pair distance distribution function (PDDF) - needs a form factor .toml file",
     )
 
     args = parser.parse_args()

From f8fc206d65721a327b4f17d01ddbdc567ec6080f Mon Sep 17 00:00:00 2001
From: Henrique Musseli Cezar <henrique.musseli@gmail.com>
Date: Fri, 29 Dec 2023 11:20:14 +0100
Subject: [PATCH 03/18] Changed PDDF to be computed using PLUMED parameters.

---
 utils/compute_rdf_aggregates.py | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py
index 0eeaa4ab..b07f66b6 100644
--- a/utils/compute_rdf_aggregates.py
+++ b/utils/compute_rdf_aggregates.py
@@ -37,6 +37,16 @@ def parse_masses(fname):
     return masses
 
 
+def parse_parametersfile(fname):
+    params = []
+    with open(fname, "r") as f:
+        for line in f:
+            if "PARAMETERS" in line:
+                right_line = line.split("=")[1].strip()
+                params.append(float(right_line.split(",")[0])) # get only the first parameter even for SAXS
+    return params
+
+
 def compute_rdfs(
     topol,
     pdbdir,
@@ -64,6 +74,9 @@ def compute_rdfs(
         Rg = []
     if compute_pddf:
         pddf = np.zeros(nbins)
+        
+        # read the form factors / scattering lengths
+        form_factors = parse_parametersfile(compute_pddf)
 
     nsnaps = 0
     for snapshot in tqdm(glob(os.path.join(pdbdir, "*.pdb"))):
@@ -187,12 +200,8 @@ def compute_rdfs(
                 )
                 # sqdistmat = squareform(cond_distmat)
 
-                # load the toml file with the form factors
-                with open(compute_pddf, "rb") as f:
-                    form_factors = tomli.load(f)
-
                 # Create a 2D array of form factors for all pairs of atoms
-                form_factors_array = np.array([form_factors[atom.residue.resname][atom.name][0] for atom in agg_sel])
+                form_factors_array = np.array([form_factors[atom.index] for atom in agg_sel])
 
                 # Compute the weights for all pairs of atoms
                 weights_matrix = np.outer(form_factors_array, form_factors_array)

From 91d887c6e45171a2f9eabeacd93dc168efce00a4 Mon Sep 17 00:00:00 2001
From: Manuel Carrer <manuelca@kjemi.uio.no>
Date: Fri, 2 Feb 2024 23:01:13 +0100
Subject: [PATCH 04/18] Add name selection in center_group.py

---
 utils/center_group.py | 190 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 157 insertions(+), 33 deletions(-)

diff --git a/utils/center_group.py b/utils/center_group.py
index b3cf7555..60479201 100644
--- a/utils/center_group.py
+++ b/utils/center_group.py
@@ -1,8 +1,10 @@
-import os
 import argparse
+import os
+import re
+import sys
+
 import h5py
 import numpy as np
-import re
 
 
 # based on https://stackoverflow.com/a/6512463/3254658
@@ -22,6 +24,19 @@ def parse_bead_list(string):
         )
     return list(range(start, end + 1))
 
+def parse_name_list(selection, h5md_file):
+    f_in = h5py.File(h5md_file, "r")
+    names = f_in["parameters/vmd_structure/name"][:]
+    species = f_in["particles/all/species"][:]
+    name_to_species = {n: s for s, n in enumerate(names)}
+
+    name_list = np.empty(0, dtype=np.int32)
+    for name in selection:
+        name_list = np.append(name_list, np.where(species == name_to_species[np.bytes_(name)]))
+
+    f_in.close()
+    return name_list
+
 
 def get_centers(positions, box, nrefs=3):
     centers = np.empty((0, positions.shape[2]))
@@ -54,23 +69,7 @@ def get_centers(positions, box, nrefs=3):
     return centers
 
 
-def center_trajectory(
-    h5md_file, bead_list, nrefbeads, overwrite=False, out_path=None, center_last=False
-):
-    if out_path is None:
-        out_path = os.path.join(
-            os.path.abspath(os.path.dirname(h5md_file)),
-            os.path.splitext(os.path.split(h5md_file)[-1])[0]
-            + "_new"
-            + os.path.splitext(os.path.split(h5md_file)[-1])[1],
-        )
-    if os.path.exists(out_path) and not overwrite:
-        error_str = (
-            f"The specified output file {out_path} already exists. "
-            f'use overwrite=True ("-f" flag) to overwrite.'
-        )
-        raise FileExistsError(error_str)
-
+def center_trajectory_mic(h5md_file, bead_list, nrefbeads, out_path, center_last=False):
     f_in = h5py.File(h5md_file, "r")
     f_out = h5py.File(out_path, "w")
 
@@ -81,7 +80,6 @@ def center_trajectory(
         f_in.copy(k, f_out)
 
     box_size = f_in["particles/all/box/edges/value"][:]
-
     beads_pos = f_in["particles/all/position/value"][:][:, bead_list, :]
     centers = get_centers(beads_pos, box_size, nrefbeads)
 
@@ -108,6 +106,68 @@ def center_trajectory(
     f_in.close()
     f_out.close()
 
+def center_of_mass(pos, n, box):
+    p_mapped = 2 * np.pi * pos / box
+    cos_p_mapped = np.cos(p_mapped)
+    sin_p_mapped = np.sin(p_mapped)
+
+    cos_average = np.sum(cos_p_mapped) / n
+    sin_average = np.sum(sin_p_mapped) / n
+
+    theta = np.arctan2(-sin_average, -cos_average) + np.pi
+    return box * theta / (2 * np.pi)
+
+
+def get_centers_com(positions, box_size, axis):
+    frames = positions.shape[0]
+    n = positions.shape[1]
+    centers = np.zeros((frames, 3))
+    for frame in range(frames):
+        centers[frame, axis] = center_of_mass(positions[frame, :, axis], n, box_size[frame, axis])
+    return centers
+
+
+def center_trajectory_com(h5md_file, bead_list, axis, out_path):
+    f_in = h5py.File(h5md_file, "r")
+    f_out = h5py.File(out_path, "w")
+
+    for k, v in f_in.attrs.items():
+        f_out.attrs[k] = v
+
+    for k in f_in.keys():
+        f_in.copy(k, f_out)
+    f_in.close()
+
+    box_size = f_out["particles/all/box/edges/value"][:]
+    beads_pos = f_out["particles/all/position/value"][:][:, bead_list, :]
+
+    box_diag = np.diag(box_size[0])
+    for frame in range(1, box_size.shape[0]):
+        box_diag = np.vstack((box_diag, np.diag(box_size[frame])))
+
+    centers = get_centers_com(beads_pos, box_diag, axis)
+
+    mask = np.eye(1, 3, k=axis)
+    box_translate = 0.5 * mask * box_diag
+
+    translate = box_translate - centers
+    tpos = f_out["particles/all/position/value"] + np.repeat(
+        translate[:, np.newaxis, :],
+        f_out["particles/all/position/value"].shape[1],
+        axis=1,
+    )
+
+    tpos = np.mod(
+        tpos[:, :, :], np.repeat(box_diag[:, np.newaxis, :], tpos.shape[1], axis=1)
+    )
+
+    f_out["particles/all/position/value"][:] = tpos - np.repeat(
+        box_translate[:, np.newaxis, :],
+        f_out["particles/all/position/value"].shape[1],
+        axis=1,
+    )
+    f_out.close()
+
 
 if __name__ == "__main__":
     description = (
@@ -120,9 +180,35 @@ def center_trajectory(
         "--beads",
         type=parse_bead_list,
         nargs="+",
-        required=True,
+        default=None,
         help="bead list to center (e.g.: 1-100 102-150)",
     )
+    parser.add_argument(
+        "-n",
+        "--names",
+        type=str,
+        nargs="+",
+        default=None,
+        help="Names of bead to center (e.g.: C1 N G2)",
+    )
+    parser.add_argument(
+        "-m",
+        "--method",
+        choices=["COM", "MIC"],
+        type=str,
+        default=None,
+        help="Specify the centering method. Available methods:\n"
+            "COM: center the box around the center of mass of the given groups along a direction (axis)\n"
+            "MIC: center the box around a centroid of the group that assures the minimal image convention",
+    )
+    parser.add_argument(
+        "--axis",
+        type=int,
+        choices=[0, 1, 2],
+        default=2,
+        required='COM' in sys.argv,
+        help="Direction along which to calculate the center of mass."
+    )
     parser.add_argument(
         "-o",
         "--out",
@@ -155,17 +241,55 @@ def center_trajectory(
     )
     args = parser.parse_args()
 
-    bead_list = []
-    for interval in args.beads:
-        bead_list += interval
+    if args.beads is None and args.names is None:
+        error_str = "Either the 'beads' or 'names' variable must be provided."
+        raise ValueError(error_str)
+
+    if args.out_path is None:
+        args.out_path = os.path.join(
+            os.path.abspath(os.path.dirname(args.h5md_file)),
+            os.path.splitext(os.path.split(args.h5md_file)[-1])[0]
+            + "_new"
+            + os.path.splitext(os.path.split(args.h5md_file)[-1])[1],
+        )
+    if os.path.exists(args.out_path) and not args.force:
+        error_str = (
+            f"The specified output file {args.out_path} already exists. "
+            f'use overwrite=True ("-f" flag) to overwrite.'
+        )
+        raise FileExistsError(error_str)
+
+    if args.beads is not None:
+        bead_list = []
+        for interval in args.beads:
+            bead_list += interval
 
-    bead_list = np.array(sorted(bead_list)) - 1
+        bead_list = np.array(sorted(bead_list)) - 1
+
+    if args.names is not None:
+       name_list = parse_name_list(args.names, args.h5md_file)
+
+    if args.beads is not None and args.names is not None:
+        atom_list = np.intersect1d(bead_list, name_list)
+    elif args.beads is not None:
+        atom_list = bead_list
+    else:
+        atom_list = name_list
+
+    if args.method == "COM":
+        center_trajectory_com(
+            args.h5md_file, 
+            atom_list, 
+            args.axis, 
+            args.out_path,
+        )
+
+    if args.method == "MIC":
+        center_trajectory_mic(
+            args.h5md_file,
+            atom_list,
+            nrefbeads=args.nrefs,
+            out_path=args.out_path,
+            center_last=args.center_last,
+        )
 
-    center_trajectory(
-        args.h5md_file,
-        bead_list,
-        nrefbeads=args.nrefs,
-        overwrite=args.force,
-        out_path=args.out_path,
-        center_last=args.center_last,
-    )

From c9c830e80eab57e1152228b50ea59a8a0c9c58a2 Mon Sep 17 00:00:00 2001
From: Manuel Carrer <manuelca@kjemi.uio.no>
Date: Sat, 3 Feb 2024 12:52:48 +0100
Subject: [PATCH 05/18] We don't need to shift the COM to zero

---
 utils/center_group.py | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/utils/center_group.py b/utils/center_group.py
index 60479201..2e34dd95 100644
--- a/utils/center_group.py
+++ b/utils/center_group.py
@@ -157,15 +157,9 @@ def center_trajectory_com(h5md_file, bead_list, axis, out_path):
         axis=1,
     )
 
-    tpos = np.mod(
+    f_out["particles/all/position/value"][:] = np.mod(
         tpos[:, :, :], np.repeat(box_diag[:, np.newaxis, :], tpos.shape[1], axis=1)
     )
-
-    f_out["particles/all/position/value"][:] = tpos - np.repeat(
-        box_translate[:, np.newaxis, :],
-        f_out["particles/all/position/value"].shape[1],
-        axis=1,
-    )
     f_out.close()
 
 

From a9ca8ac8b04678f15b0860e430a89ee4c759190b Mon Sep 17 00:00:00 2001
From: Manuel Carrer <manuelca@kjemi.uio.no>
Date: Sat, 3 Feb 2024 12:59:14 +0100
Subject: [PATCH 06/18] Cleanup

---
 utils/center_group.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/utils/center_group.py b/utils/center_group.py
index 2e34dd95..a7254444 100644
--- a/utils/center_group.py
+++ b/utils/center_group.py
@@ -148,17 +148,18 @@ def center_trajectory_com(h5md_file, bead_list, axis, out_path):
     centers = get_centers_com(beads_pos, box_diag, axis)
 
     mask = np.eye(1, 3, k=axis)
-    box_translate = 0.5 * mask * box_diag
-
-    translate = box_translate - centers
-    tpos = f_out["particles/all/position/value"] + np.repeat(
+    translate = 0.5 * mask * box_diag - centers
+    
+    translations = np.repeat(
         translate[:, np.newaxis, :],
-        f_out["particles/all/position/value"].shape[1],
+        f_in["particles/all/position/value"].shape[1],
         axis=1,
     )
+    
+    tpos = f_out["particles/all/position/value"] + translations
 
     f_out["particles/all/position/value"][:] = np.mod(
-        tpos[:, :, :], np.repeat(box_diag[:, np.newaxis, :], tpos.shape[1], axis=1)
+        tpos, np.repeat(box_diag[:, np.newaxis, :], tpos.shape[1], axis=1)
     )
     f_out.close()
 

From 33c74b96dfab307dd05b6d7fae0eb4f43f57e556 Mon Sep 17 00:00:00 2001
From: Manuel Carrer <manuelca@kjemi.uio.no>
Date: Sun, 4 Feb 2024 17:37:03 +0100
Subject: [PATCH 07/18] Fix wrong variable

---
 utils/center_group.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/center_group.py b/utils/center_group.py
index a7254444..2d8bb9d8 100644
--- a/utils/center_group.py
+++ b/utils/center_group.py
@@ -152,7 +152,7 @@ def center_trajectory_com(h5md_file, bead_list, axis, out_path):
     
     translations = np.repeat(
         translate[:, np.newaxis, :],
-        f_in["particles/all/position/value"].shape[1],
+        f_out["particles/all/position/value"].shape[1],
         axis=1,
     )
     

From 1debff6a31e91d23dc9768afb23591dc86801c4c Mon Sep 17 00:00:00 2001
From: Manuel Carrer <manuelca@kjemi.uio.no>
Date: Sun, 4 Feb 2024 19:17:21 +0100
Subject: [PATCH 08/18] Use np.mean

---
 utils/center_group.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/utils/center_group.py b/utils/center_group.py
index 2d8bb9d8..a8c56696 100644
--- a/utils/center_group.py
+++ b/utils/center_group.py
@@ -106,13 +106,13 @@ def center_trajectory_mic(h5md_file, bead_list, nrefbeads, out_path, center_last
     f_in.close()
     f_out.close()
 
-def center_of_mass(pos, n, box):
+def center_of_mass(pos, box):
     p_mapped = 2 * np.pi * pos / box
     cos_p_mapped = np.cos(p_mapped)
     sin_p_mapped = np.sin(p_mapped)
 
-    cos_average = np.sum(cos_p_mapped) / n
-    sin_average = np.sum(sin_p_mapped) / n
+    cos_average = np.mean(cos_p_mapped)
+    sin_average = np.mean(sin_p_mapped)
 
     theta = np.arctan2(-sin_average, -cos_average) + np.pi
     return box * theta / (2 * np.pi)
@@ -120,10 +120,9 @@ def center_of_mass(pos, n, box):
 
 def get_centers_com(positions, box_size, axis):
     frames = positions.shape[0]
-    n = positions.shape[1]
     centers = np.zeros((frames, 3))
     for frame in range(frames):
-        centers[frame, axis] = center_of_mass(positions[frame, :, axis], n, box_size[frame, axis])
+        centers[frame, axis] = center_of_mass(positions[frame, :, axis], box_size[frame, axis])
     return centers
 
 

From 78a18a2267e2b9ac369e99c7589a5b590edf849f Mon Sep 17 00:00:00 2001
From: Henrique Musseli Cezar <henrique.musseli@gmail.com>
Date: Wed, 28 Feb 2024 16:33:54 +0100
Subject: [PATCH 09/18] Add option to font sizes in aggregates.py

---
 utils/aggregates.py | 55 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 43 insertions(+), 12 deletions(-)

diff --git a/utils/aggregates.py b/utils/aggregates.py
index 596aeecc..0c7964cb 100644
--- a/utils/aggregates.py
+++ b/utils/aggregates.py
@@ -1,3 +1,5 @@
+# PYTHON_ARGCOMPLETE_OK
+
 import MDAnalysis as mda
 from MDAnalysis.analysis import distances
 import numpy as np
@@ -6,7 +8,7 @@
 from matplotlib.ticker import MaxNLocator
 import os
 import sys
-import argparse
+import argcomplete, argparse
 from tqdm import tqdm
 import warnings
 import dask
@@ -134,6 +136,8 @@ def aggregates_clustering(
     plot_dendrograms,
     traj_in_memory,
     save_solvent,
+    font_size,
+    caption_font_size,
     summary_fig_size=(12, 8),
 ):
     u = mda.Universe(grofile, h5mdfile, in_memory=traj_in_memory)
@@ -236,30 +240,42 @@ def aggregates_clustering(
 
     # plot results
     _, axs = plt.subplots(2, 2, figsize=summary_fig_size)
+    plt.rcParams.update({"font.size": font_size})
+
+    axs[0, 0].text(-0.2, 1.05, "(a)", transform=axs[0, 0].transAxes, fontsize=caption_font_size, va='top', ha='right')
+    axs[0, 1].text(-0.2, 1.05, "(b)", transform=axs[0, 1].transAxes, fontsize=caption_font_size, va='top', ha='right')
+    axs[1, 0].text(-0.2, 1.05, "(c)", transform=axs[1, 0].transAxes, fontsize=caption_font_size, va='top', ha='right')
+    axs[1, 1].text(-0.2, 1.05, "(d)", transform=axs[1, 1].transAxes, fontsize=caption_font_size, va='top', ha='right')
 
     axs[0, 0].plot(frames, n_clusters)
     axs[0, 0].axhline(avg_n_aggs, linestyle="--")
-    axs[0, 0].set_ylabel("Num. of aggregates")
-    axs[0, 0].set_xlabel("Frame")
+    axs[0, 0].set_ylabel("Num. of aggregates", fontsize=font_size)
+    axs[0, 0].set_xlabel("Frame", fontsize=font_size)
+    # axs[0, 0].set_ylim([4, 6])
     axs[0, 0].yaxis.set_major_locator(MaxNLocator(integer=True))
 
     xticklabels = [f"{sizes[i]}" for i in range(len(sizes))]
     axs[0, 1].bar(xticklabels, freq, width=0.8)
-    axs[0, 1].set_ylabel("Avg. num. per snapshot")
-    axs[0, 1].set_xlabel("Aggregate size")
-    axs[0, 1].tick_params("x", labelrotation=60)
+    axs[0, 1].set_ylabel("Avg. num. per snapshot", fontsize=font_size)
+    axs[0, 1].set_xlabel("Aggregate size", fontsize=font_size)
+    axs[0, 1].tick_params("x", labelrotation=60, labelsize=font_size)
 
     xticklabels = [f"{k}" for k in prob_by_size.keys()]
     axs[1, 0].bar(xticklabels, prob_by_size.values(), width=0.8)
-    axs[1, 0].set_ylabel("Prob.")
-    axs[1, 0].set_xlabel("Aggregate size")
-    axs[1, 0].tick_params("x", labelrotation=60)
+    axs[1, 0].set_ylabel("Prob.", fontsize=font_size)
+    axs[1, 0].set_xlabel("Aggregate size", fontsize=font_size)
+    axs[1, 0].tick_params("x", labelrotation=60, labelsize=font_size)
 
     xticklabels = [f"{k}" for k in prob_mol_size.keys()]
     axs[1, 1].bar(xticklabels, prob_mol_size.values(), width=0.8)
-    axs[1, 1].set_ylabel("Prob. molecule in agg.")
-    axs[1, 1].set_xlabel("Aggregate size")
-    axs[1, 1].tick_params("x", labelrotation=60)
+    axs[1, 1].set_ylabel("Prob. molecule in agg.", fontsize=font_size)
+    axs[1, 1].set_xlabel("Aggregate size", fontsize=font_size)
+    axs[1, 1].tick_params("x", labelrotation=60, labelsize=font_size)
+
+    axs[0, 0].tick_params(axis='both', which='major', labelsize=font_size)
+    axs[0, 1].tick_params(axis='both', which='major', labelsize=font_size)
+    axs[1, 0].tick_params(axis='both', which='major', labelsize=font_size)
+    axs[1, 1].tick_params(axis='both', which='major', labelsize=font_size)
 
     plt.tight_layout()
     plt.savefig("summary_clustering.pdf", bbox_inches="tight")
@@ -351,6 +367,18 @@ def aggregates_clustering(
         default=(8, 6),
         help="two integers to define the size of the summary figure (default = 8 6)",
     )
+    parser.add_argument(
+        "--font-size",
+        type=int,
+        default=12,
+        help="font size for the summary figure (default = 12)",
+    )
+    parser.add_argument(
+        "--caption-font-size",
+        type=int,
+        default=13,
+        help="font size for the captions of summary figure (default = 13)",
+    )
     parser.add_argument(
         "--traj-in-memory",
         action="store_true",
@@ -358,6 +386,7 @@ def aggregates_clustering(
         help="load the whole trajectory in memory with MDAanalysis",
     )
 
+    argcomplete.autocomplete(parser)
     args = parser.parse_args()
 
     if os.path.splitext(args.h5md_file)[1].lower() == ".h5":
@@ -404,5 +433,7 @@ def aggregates_clustering(
             args.plot_dendrograms,
             args.traj_in_memory,
             args.save_solvent,
+            args.font_size,
+            args.caption_font_size,
             tuple(args.summary_fig_size),
         )

From c406ed1d3f783c92f08fca5ffdb399f39a1f2560 Mon Sep 17 00:00:00 2001
From: Henrique Musseli Cezar <henrique.musseli@gmail.com>
Date: Fri, 8 Mar 2024 16:51:39 +0100
Subject: [PATCH 10/18] Print clusters found in each frame.

---
 utils/aggregates.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/utils/aggregates.py b/utils/aggregates.py
index 0c7964cb..c7310685 100644
--- a/utils/aggregates.py
+++ b/utils/aggregates.py
@@ -186,21 +186,19 @@ def aggregates_clustering(
 
     n_clusters = []
     all_sizes = []
-    total_clust_by_size = {}
+    clusters_per_frame = []
     for c in clusters[0]:
         # get the number of clusters and sizes
         unique_clusts, clust_counts = np.unique(c, return_counts=True)
-        for size in clust_counts:
-            if size not in total_clust_by_size:
-                total_clust_by_size[size] = 1
-            else:
-                total_clust_by_size[size] += 1
+
         n_clusters.append(len(unique_clusts))
 
+        clusters_per_frame.append(np.sort(clust_counts))
         all_sizes += clust_counts.tolist()
 
     # based on cluster sizes get occurence of each size
     sizes, freq = np.unique(all_sizes, return_counts=True)
+    total_clust_by_size = dict(zip(sizes, freq))
     freq = freq / len(u.trajectory[skip:end:stride])
 
     # overall average number of aggregates
@@ -238,6 +236,12 @@ def aggregates_clustering(
         for s, p in prob_mol_size.items():
             of.write(f"{s}\t{p}\n")
 
+        of.write("\nClusters sizes in each analyzed frame:\n")
+        for c in clusters_per_frame:
+            for s in c:
+                of.write(f"{s}\t")
+            of.write("\n")
+
     # plot results
     _, axs = plt.subplots(2, 2, figsize=summary_fig_size)
     plt.rcParams.update({"font.size": font_size})

From 7b146e40cf89722618ef9ce453681f46b5ea820b Mon Sep 17 00:00:00 2001
From: Henrique Musseli Cezar <henrique.musseli@gmail.com>
Date: Fri, 5 Apr 2024 16:06:54 +0200
Subject: [PATCH 11/18] Add option to compute CDF.

---
 utils/compute_rdf_aggregates.py | 292 ++++++++++++++++++++++++++++----
 1 file changed, 255 insertions(+), 37 deletions(-)

diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py
index b07f66b6..0ff7ec41 100644
--- a/utils/compute_rdf_aggregates.py
+++ b/utils/compute_rdf_aggregates.py
@@ -1,8 +1,9 @@
 import MDAnalysis as mda
-from MDAnalysis.lib import distances
+from MDAnalysis.lib import distances, mdamath, transformations
 from scipy.spatial.distance import squareform
 import numpy as np
 import matplotlib.pyplot as plt
+from matplotlib.image import NonUniformImage
 import os
 import sys
 from glob import glob
@@ -55,18 +56,29 @@ def compute_rdfs(
     name_selections,
     center,
     nbins,
+    nbinsl,
     rmax,
+    lmax,
+    natoms_normalization,
     masses,
     skip_rdfs,
     compute_i_rg,
     compute_pddf,
+    compute_cdfs,
     save_centered,
     save_agg_only,
+    names_prefix,
+    start_colorbar,
+    width_colorbar,
+    fig_size=(10, 8),
+    fig_size_cdf=(12,12),
 ):
     # initialize
     rdfs = {}
+    cdfs = {}
     for i in range(len(selections)):
         rdfs[i] = np.zeros(nbins)
+        cdfs[i] = np.zeros((nbinsl, nbins))
     if compute_i_rg:
         I1 = []
         I2 = []
@@ -78,10 +90,17 @@ def compute_rdfs(
         # read the form factors / scattering lengths
         form_factors = parse_parametersfile(compute_pddf)
 
+    if not names_prefix:
+        names_prefix = ""
+
     nsnaps = 0
     for snapshot in tqdm(glob(os.path.join(pdbdir, "*.pdb"))):
         u = mda.Universe(snapshot)
 
+        if not natoms_normalization:
+            natoms_normalization = len(u.atoms)
+
+
         # based on topology and residues determine size of aggregates
         resids = {}
         for residue in u.residues:
@@ -127,7 +146,7 @@ def compute_rdfs(
 
             # since some methods are not PBC aware, we center the
             # aggregate in the box so it does not split in the PBCs
-            if save_agg_only or save_centered or compute_i_rg:
+            if save_agg_only or save_centered or compute_i_rg or compute_cdfs:
                 box_center = box_vectors[:3] / 2.0
                 u.atoms.translate(box_center - cog)
                 u.atoms.wrap(compound="atoms")
@@ -155,8 +174,8 @@ def compute_rdfs(
                     os.path.join(dirname, f"centered_{os.path.basename(snapshot)}")
                 )
 
-            # compute the principal moment of inertia and Rg
-            if compute_i_rg:
+            # set the masses
+            if masses:
                 # set the masses
                 agg_sel = u.select_atoms(f"resid {resid}")
 
@@ -172,6 +191,10 @@ def compute_rdfs(
                 agg_sel.masses = mass_array
                 total_mass = np.sum(mass_array)
 
+            # compute the principal moment of inertia and Rg
+            if compute_i_rg:
+                agg_sel = u.select_atoms(f"resid {resid}")
+
                 # get moment of inertia and principal axis
                 I = agg_sel.moment_of_inertia()
                 UT = agg_sel.principal_axes()  # this is already transposed
@@ -212,27 +235,66 @@ def compute_rdfs(
                 # Compute the histogram
                 pddf_i, pddf_edges = np.histogram(cond_distmat, bins=nbins, range=(0, rmax), weights=weights, density=False)
 
-                # deltaR = rmax / nbins
-                # pddf_i = np.zeros((nbins,), dtype=np.float64)
-                # pddf_dists = np.linspace(deltaR/2.0, rmax - deltaR/2.0, nbins)
-                # for i in range(n):
-                #     for j in range(i + 1, n):
-                #         if sqdistmat[i][j] <= rmax:
-                #             bin_distance = np.floor(sqdistmat[i][j]/deltaR)
-                        
-                #         pddf_i[int(bin_distance)] += form_factors[agg_sel[i].residue.resname][agg_sel[i].name][0] * form_factors[agg_sel[j].residue.resname][agg_sel[j].name][0]
-
-
-                # pddf_i, pddf_edges = np.histogram(
-                #     cond_distmat,
-                #     bins=nbins,
-                #     range=(0, rmax),
-                #     density=False,
+                pddf += pddf_i
+
+            # compute the cylindrical distribution function
+            if compute_cdfs:
+                # Based on https://github.com/RichardMandle/cylindr
+                # Cite: 10.1371/journal.pone.0279679
+
+                # first align the principal axis of the aggregate with the z-axis
+                agg_sel = u.select_atoms(f"resid {resid}")
+
+                # get principal axis
+                pa = agg_sel.principal_axes()[2]
+
+                # get the angle between the principal axis and the z-axis and rotate universe
+                angle = np.degrees(mdamath.angle(pa, [0,0,1]))
+                ax = transformations.rotaxis(pa, [0,0,1])
+                u.atoms.rotateby(angle, ax, point=cog)
+                # dirname = f"./rotated_cdf_{agg_size}"
+
+                # if not os.path.exists(dirname):
+                #     os.mkdir(dirname)
+
+                # u.atoms.write(
+                #     os.path.join(dirname, f"rotated_{os.path.basename(snapshot)}")
                 # )
 
-                pddf += pddf_i
+                # create selections for which CDFs will be computed
+                for i, sel_string in enumerate(selections):
+                    if sel_string.strip().lower() in ["name w", "type w", "name na", "type na", "name cl", "type cl"]:
+                        at_sel = u.select_atoms(sel_string)
+                    else:
+                        at_sel = u.select_atoms(f"resid {resid} and " + sel_string)
+
+
+                    # Compute the distances between the target atoms and the center
+                    distances_matrix = at_sel.positions - cog
+
+                    # create array with z and r distances
+                    zdist = distances_matrix[:, 2]
+                    rdist = np.sqrt(distances_matrix[:, 0] ** 2 + distances_matrix[:, 1] ** 2)
+
+                    # Exclude positions where z is greater than lmax or smaller than -lmax
+                    valid_indices = np.where((zdist >= -lmax) & (zdist <= lmax))
+                    zdist = zdist[valid_indices]
+                    rdist = rdist[valid_indices]
+
+                    # Compute the CDF
+                    cdf, yedges, xedges = np.histogram2d(
+                        zdist, rdist, bins=[nbinsl, nbins], range=[[-lmax, lmax], [0, rmax]], density=False
+                    )
+                    cdfs[i] += cdf * np.prod(box_vectors[:3]) / natoms_normalization
+
+                    # Compute the RDF using numpy histogram
+                    rdf, bin_edges = np.histogram(
+                        rdist, bins=nbins, range=(0, rmax), density=False
+                    )
+                    rdfs[i] += rdf * np.prod(box_vectors[:3]) / natoms_normalization
 
-            if not skip_rdfs:
+
+            elif not skip_rdfs:
                 # create selections for which RDFs will be computed
                 for i, sel_string in enumerate(selections):
                     if sel_string.strip().lower() in ["name w", "type w", "name na", "type na", "name cl", "type cl"]:
@@ -253,19 +315,108 @@ def compute_rdfs(
                         flattened_distances, bins=nbins, range=(0, rmax), density=False
                     )
 
-                    rdfs[i] += rdf * np.prod(box_vectors[:3]) / len(u.atoms)
+                    rdfs[i] += rdf * np.prod(box_vectors[:3]) / natoms_normalization
+
 
-    plt.rcParams["figure.figsize"] = (10, 8)
+    plt.rcParams["figure.figsize"] = fig_size
     plt.rcParams["font.size"] = 22
 
+    if compute_cdfs:
+        # compute the volume of each shell
+        # shell_volumes = np.zeros((np.size(yedges) - 1, np.size(xedges) - 1))
+        # for n in range(0, np.size(yedges) -1):
+        #     for m in range(0, np.size(xedges) - 1):
+        #         shell_volumes[n, m] = ((2 * np.pi * (xedges[m + 1]) ** 2) - (2 * np.pi * (xedges[m - 1]) ** 2)) * np.abs(yedges[2] - yedges[1])
+        shell_volumes = np.pi * (xedges[1:] ** 2 - xedges[:-1] ** 2) * np.abs(yedges[1] - yedges[0])
+        xedges /= 10.0
+        yedges /= 10.0
+        # prepare to plot
+        xcenters = (xedges[:-1] + xedges[1:]) / 2.0
+        ycenters = (yedges[:-1] + yedges[1:]) / 2.0
+        np.savetxt(names_prefix + f"{agg_size}_binsR.txt", xcenters)
+        np.savetxt(names_prefix + f"{agg_size}_binsL.txt", ycenters)
+
+        # normalize CDFs and find the lower and largest values
+        minval = np.inf
+        maxval = -np.inf
+        for i in range(len(selections)):
+            cdfs[i] = cdfs[i] / (shell_volumes * nsnaps)
+            if minval > np.min(cdfs[i]):
+                minval = np.min(cdfs[i])
+            if maxval < np.max(cdfs[i]):
+                maxval = np.max(cdfs[i])
+
+        # normalize the CDF by dividing by the shell volume and number of snapshots and plot
+        fig, ax = plt.subplots(nrows=2, ncols=int(np.ceil(len(selections)/2)), figsize=fig_size_cdf)
+        fig.tight_layout(w_pad=0.4, h_pad=1.8)
+        for a in ax.flatten():
+            a.set_aspect(5/10)
+        if int(np.ceil(len(selections)/2)) == 1:
+            for i, sel in enumerate(selections):
+                ax[i].set_title(name_selections[i])
+                ax[i].set_xlim(xedges[[0, -1]])
+                ax[i].set_ylim(yedges[[0, -1]])
+                ax[i].set_xticks(np.linspace(xedges[0], xedges[-1], 3))
+                ax[i].set_yticks(np.linspace(yedges[0], yedges[-1], 7))
+                np.savetxt(names_prefix + f"CDFs_{agg_size}_" + sel.replace(" ", "_") + ".txt", cdfs[i])
+                im = ax[i].imshow(cdfs[i], extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], origin='lower', interpolation='bilinear', vmin=minval, vmax=maxval, cmap=plt.cm.RdBu)
+                ax[i].grid()
+                ax[i].set_xlabel(r"r (nm)")
+                ax[i].set_ylabel(r"h (nm)")
+        else:
+            # make the last plot blank if the number of selections is odd
+            if len(selections) % 2 != 0:
+                ax[-1][-1].axis('off')
+            for i, sel in enumerate(selections):
+                idx1, idx2 = i % 2, int(i / 2)
+                ax[idx1][idx2].set_title(name_selections[i])
+                ax[idx1][idx2].set_xlim(xedges[[0, -1]])
+                ax[idx1][idx2].set_ylim(yedges[[0, -1]])
+                ax[idx1][idx2].set_xticks(np.linspace(xedges[0], xedges[-1], 3))
+                ax[idx1][idx2].set_yticks(np.linspace(yedges[0], yedges[-1], 7))
+                np.savetxt(names_prefix + f"CDFs_{agg_size}_" + sel.replace(" ", "_") + ".txt", cdfs[i])
+                im = ax[idx1][idx2].imshow(cdfs[i], extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], origin='lower', interpolation='bilinear', vmin=minval, vmax=maxval, cmap=plt.cm.RdBu)
+                ax[idx1][idx2].grid()
+                ax[idx1][idx2].set_xlabel(r"r (nm)")
+                ax[idx1][idx2].set_ylabel(r"h (nm)")
+            # Add color bar
+        cax = fig.add_axes([start_colorbar, 0.15, width_colorbar, 0.7])
+        cbar = fig.colorbar(im, cax=cax)
+        cbar.set_label('CDF')
+        fig.subplots_adjust(hspace=0.35, right=start_colorbar)
+        plt.savefig(names_prefix + f"CDFs_{agg_size}.pdf")
+        plt.show()
+        plt.close('all')
+
+        # radial
+        # shell_volumes = np.pi * (bin_edges[1:] ** 2 - bin_edges[:-1] ** 2) * box_vectors[2]
+        shell_volumes = np.pi * (bin_edges[1:] ** 2 - bin_edges[:-1] ** 2) * 2.0 * lmax
+        # Prepare to plot
+        mid_bin = (bin_edges[:-1] + bin_edges[1:]) / 2.0
+        np.savetxt(names_prefix + f"gperp_{agg_size}_bins.txt", mid_bin)
+
+        # Normalize the RDF by dividing by the shell volume and number of snapshots and plot
+        for i, sel in enumerate(selections):
+            rdfs[i] = rdfs[i] / (shell_volumes * nsnaps)
+
+            plt.plot(mid_bin / 10.0, rdfs[i], linewidth=3, label=name_selections[i])
+
+            np.savetxt(names_prefix + f"gperp_{agg_size}_" + sel.replace(" ", "_") + ".txt", rdfs[i])
+
+        plt.legend(frameon=False)
+        plt.xlabel(r"r (nm)")
+        plt.ylabel(r"g$_\perp$(r)")
+        plt.savefig(names_prefix + f"radial_RDFs_{agg_size}.pdf", bbox_inches="tight")
+        plt.show()
+
     # check if rdfs are skipped
-    if not skip_rdfs:
+    elif not skip_rdfs:
         # Compute the average volume of each shell
         shell_volumes = (4.0 / 3.0) * np.pi * (bin_edges[1:] ** 3 - bin_edges[:-1] ** 3)
 
         # Prepare to plot
         mid_bin = (bin_edges[:-1] + bin_edges[1:]) / 2.0
-        np.savetxt(f"{agg_size}_bins.txt", mid_bin)
+        np.savetxt(names_prefix + f"{agg_size}_bins.txt", mid_bin)
 
         # Normalize the RDF by dividing by the shell volume and number of snapshots and plot
         for i, sel in enumerate(selections):
@@ -273,12 +424,12 @@ def compute_rdfs(
 
             plt.plot(mid_bin / 10.0, rdfs[i], linewidth=3, label=name_selections[i])
 
-            np.savetxt(f"{agg_size}_" + sel.replace(" ", "_") + ".txt", rdfs[i])
+            np.savetxt(names_prefix + f"{agg_size}_" + sel.replace(" ", "_") + ".txt", rdfs[i])
 
         plt.legend(frameon=False)
         plt.xlabel(r"r (nm)")
         plt.ylabel(r"g(r)")
-        plt.savefig(f"RDFs_{agg_size}.pdf", bbox_inches="tight")
+        plt.savefig(names_prefix + f"RDFs_{agg_size}.pdf", bbox_inches="tight")
         plt.show()
 
     # now save information regarding the Is and Rg
@@ -309,7 +460,7 @@ def compute_rdfs(
         plt.xlabel(r"Config")
         plt.ylabel(r"MOI (Da $\AA^2$)")
 
-        plt.savefig(f"Is_{agg_size}.pdf", bbox_inches="tight")
+        plt.savefig(names_prefix + f"Is_{agg_size}.pdf", bbox_inches="tight")
         plt.show()
 
         # clear plot and plot Rg
@@ -319,7 +470,7 @@ def compute_rdfs(
 
         plt.xlabel(r"Config")
         plt.ylabel(r"R$_g$ ($\AA$)")
-        plt.savefig(f"Rg_{agg_size}.pdf", bbox_inches="tight")
+        plt.savefig(names_prefix + f"Rg_{agg_size}.pdf", bbox_inches="tight")
         plt.show()
 
         # Now print average values and alpha
@@ -339,14 +490,14 @@ def compute_rdfs(
 
         # pddf /= np.max(pddf)
 
-        np.savetxt(f"{agg_size}_" + "PDDF.txt", pddf)
-        np.savetxt(f"{agg_size}_" + "PDDF_bins.txt", mid_bin_pddf)
+        np.savetxt(names_prefix + f"{agg_size}_" + "PDDF.txt", pddf)
+        np.savetxt(names_prefix + f"{agg_size}_" + "PDDF_bins.txt", mid_bin_pddf)
 
         plt.plot(mid_bin_pddf / 10.0, pddf, linewidth=3)
 
         plt.xlabel(r"r (nm)")
         plt.ylabel(r"PDDF")
-        plt.savefig(f"PDDF_{agg_size}.pdf", bbox_inches="tight")
+        plt.savefig(names_prefix + f"PDDF_{agg_size}.pdf", bbox_inches="tight")
         plt.show()
 
 
@@ -373,14 +524,26 @@ def compute_rdfs(
     parser.add_argument(
         "--nbins",
         type=int,
-        default=100,
-        help="number of bins used in the histogram (default = 100)",
+        default=50,
+        help="number of bins used in the histogram (default = 50)",
+    )
+    parser.add_argument(
+        "--nbinsl",
+        type=int,
+        default=300,
+        help="number of bins used in the histogram for CDF in z direction (default = 300)",
     )
     parser.add_argument(
         "--rmax",
         type=float,
         default=10.0,
-        help="maximum distance of RDF (defualt = 10.0 A)",
+        help="maximum distance of RDF (default = 10.0 A)",
+    )
+    parser.add_argument(
+        "--lmax",
+        type=float,
+        default=30.0,
+        help="maximum distance for CDF z direction (default = 30.0 A)",
     )
     parser.add_argument(
         "--center",
@@ -412,6 +575,12 @@ def compute_rdfs(
         default=False,
         help="skip RDF computations",
     )
+    parser.add_argument(
+        "--compute-cdfs",
+        action="store_true",
+        default=False,
+        help="instead of computing RDFs, compute CDFs (requires --masses)",
+    )
     parser.add_argument(
         "--principal-moments-rg",
         action="store_true",
@@ -422,13 +591,53 @@ def compute_rdfs(
         "--compute-pddf",
         type=str,
         default=None,
-        help="compute the pair distance distribution function (PDDF) - needs a form factor .toml file",
+        help="compute the pair distance distribution function (PDDF) - needs parameters file just like for PLUMED",
+    )
+    parser.add_argument(
+        "--natoms-normalization",
+        type=int,
+        default=None,
+        help="number of atoms to normalize the RDFs/CDFs",
+    )
+    parser.add_argument(
+        "--names-prefix",
+        type=str,
+        default=None,
+        help="prefix for the names of the saved files",
+    )
+    parser.add_argument(
+        "--fig-size",
+        type=int,
+        nargs=2,
+        default=(10, 8),
+        help="two integers to define the size of the RDF figure (default = 10 8)",
+    )
+    parser.add_argument(
+        "--fig-size-cdf",
+        type=int,
+        nargs=2,
+        default=(12, 12),
+        help="two integers to define the size of the CDF figure (default = 12 12)",
+    )
+    parser.add_argument(
+        "--start-colorbar",
+        type=float,
+        default=0.85,
+        help="value for which after that the colorbar will start (default = 0.85) in percentage of width",
+    )
+    parser.add_argument(
+        "--width-colorbar",
+        type=float,
+        default=0.02,
+        help="width of the colorbar (default = 0.02)",
     )
 
     args = parser.parse_args()
 
     if args.principal_moments_rg and not args.masses:
         raise AssertionError("--principal-moments-rg requires passing the masses")
+    if args.compute_cdfs and not args.masses:
+        raise AssertionError("--compute-cdfs requires passing the masses")
     
     if args.name_selections and (len(args.selections) != len(args.name_selections)):
         raise AssertionError("the number of selections must be equal to the number of names")
@@ -449,11 +658,20 @@ def compute_rdfs(
         args.name_selections,
         args.center,
         args.nbins,
+        args.nbinsl,
         args.rmax,
+        args.lmax,
+        args.natoms_normalization,
         parse_masses(args.masses),
         args.do_not_compute_rdfs,
         args.principal_moments_rg,
         args.compute_pddf,
+        args.compute_cdfs,
         args.save_centered_aggregate,
         args.save_aggregate_only,
+        args.names_prefix,
+        args.start_colorbar,
+        args.width_colorbar,
+        tuple(args.fig_size),
+        tuple(args.fig_size_cdf),
     )

From 3eb2f29da35e231f70b4026bde7dfc0992d96c6c Mon Sep 17 00:00:00 2001
From: Henrique Musseli Cezar <henrique.musseli@gmail.com>
Date: Tue, 21 May 2024 11:26:41 +0200
Subject: [PATCH 12/18] Adds separation in spherical, prolate and oblate
 aggregates.

---
 utils/compute_rdf_aggregates.py | 242 +++++++++++++++++++++-----------
 1 file changed, 161 insertions(+), 81 deletions(-)

diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py
index 0ff7ec41..45d4424e 100644
--- a/utils/compute_rdf_aggregates.py
+++ b/utils/compute_rdf_aggregates.py
@@ -76,9 +76,21 @@ def compute_rdfs(
     # initialize
     rdfs = {}
     cdfs = {}
-    for i in range(len(selections)):
-        rdfs[i] = np.zeros(nbins)
-        cdfs[i] = np.zeros((nbinsl, nbins))
+    if compute_cdfs:
+        type_map = {"sphere": 0, "oblate": 1, "prolate": 2}
+        rev_type_map = {v: k for k, v in type_map.items()}
+        agg_types = []
+        eccentricities = []
+
+        for k, v in type_map.items():
+            cdfs[v] = {}
+            rdfs[v] = {}
+            for i in range(len(selections)):
+                cdfs[v][i] = np.zeros((nbinsl, nbins))
+                rdfs[v][i] = np.zeros(nbins)
+    else:
+        for i in range(len(selections)):
+            rdfs[i] = np.zeros(nbins)
     if compute_i_rg:
         I1 = []
         I2 = []
@@ -94,13 +106,13 @@ def compute_rdfs(
         names_prefix = ""
 
     nsnaps = 0
+    snapshots = []
     for snapshot in tqdm(glob(os.path.join(pdbdir, "*.pdb"))):
         u = mda.Universe(snapshot)
 
         if not natoms_normalization:
             natoms_normalization = len(u.atoms)
 
-
         # based on topology and residues determine size of aggregates
         resids = {}
         for residue in u.residues:
@@ -122,6 +134,8 @@ def compute_rdfs(
         if len(agg_resids) == 0:
             continue
 
+        snapshots.append(os.path.basename(snapshot))
+
         # for each aggregate of given size, accumulate the RDFs
         for resid in agg_resids:
             nsnaps += 1
@@ -245,10 +259,47 @@ def compute_rdfs(
                 # first align the principal axis of the aggregate with the z-axis
                 agg_sel = u.select_atoms(f"resid {resid}")
 
-                # get principal axis
-                pa = agg_sel.principal_axes()[2]
+                # get moment of inertia and principal axis
+                I = agg_sel.moment_of_inertia()
+                UT = agg_sel.principal_axes()  # this is already transposed
+
+                # diagonalizes I
+                Lambda = UT.dot(I.dot(UT.T))
+
+                I1 = Lambda[0][0]
+                I2 = Lambda[1][1]
+                I3 = Lambda[2][2]
+
+                # TODO: based on the three principal moments of inertia we should find
+                # if the aggregate is rod-like, disk-like or sphere-like
+                # and then compute the CDFs accordingly (separating the cases)
+                # this can be done by checking which axis are similar in length and calling them a = b != c
+                # if c/a is close to 1, then it is a sphere
+                # if c/a is greater than 1, then it is prolate
+                # if c/a is smaller than 1, then it is oblate
+                # then we can also output the amount of each type of aggregate
+                type_agg = None
+                # compute eccentricity
+                e = 1.0 - np.min([I1, I2, I3]) / np.mean([I1, I2, I3])
+                eccentricities.append(e)
+                if e < 0.12:
+                    type_agg = type_map["sphere"]
+                else:
+                    # I3 is the lowest value (biggest axis)
+                    if np.abs(I1 - I2) > np.abs(I2 - I3): 
+                        type_agg = type_map["oblate"]
+                    else:
+                        type_agg = type_map["prolate"]
+                agg_types.append(type_agg)
+
+                # gets symmetry axis
+                if type_agg == type_map["oblate"]:
+                    pa = UT[0]
+                else:
+                    pa = UT[2]
 
                 # get the angle between the principal axis and the z-axis and rotate universe
+                # this does not take into consideration prolate or oblate micelles
                 angle = np.degrees(mdamath.angle(pa, [0,0,1]))
                 ax = transformations.rotaxis(pa, [0,0,1])
                 u.atoms.rotateby(angle, ax, point=cog)
@@ -268,7 +319,6 @@ def compute_rdfs(
                     else:
                         at_sel = u.select_atoms(f"resid {resid} and " + sel_string)
 
-
                     # Compute the distances between the target atoms and the center
                     distances_matrix = at_sel.positions - cog
 
@@ -285,13 +335,13 @@ def compute_rdfs(
                     cdf, yedges, xedges = np.histogram2d(
                         zdist, rdist, bins=[nbinsl, nbins], range=[[-lmax, lmax], [0, rmax]], density=False
                     )
-                    cdfs[i] += cdf * np.prod(box_vectors[:3]) / natoms_normalization
+                    cdfs[type_agg][i] += cdf * np.prod(box_vectors[:3]) / natoms_normalization
 
                     # Compute the RDF using numpy histogram
                     rdf, bin_edges = np.histogram(
                         rdist, bins=nbins, range=(0, rmax), density=False
                     )
-                    rdfs[i] += rdf * np.prod(box_vectors[:3]) / natoms_normalization
+                    rdfs[type_agg][i] += rdf * np.prod(box_vectors[:3]) / natoms_normalization
 
 
             elif not skip_rdfs:
@@ -322,12 +372,17 @@ def compute_rdfs(
     plt.rcParams["font.size"] = 22
 
     if compute_cdfs:
-        # compute the volume of each shell
-        # shell_volumes = np.zeros((np.size(yedges) - 1, np.size(xedges) - 1))
-        # for n in range(0, np.size(yedges) -1):
-        #     for m in range(0, np.size(xedges) - 1):
-        #         shell_volumes[n, m] = ((2 * np.pi * (xedges[m + 1]) ** 2) - (2 * np.pi * (xedges[m - 1]) ** 2)) * np.abs(yedges[2] - yedges[1])
-        shell_volumes = np.pi * (xedges[1:] ** 2 - xedges[:-1] ** 2) * np.abs(yedges[1] - yedges[0])
+        # plot evolution of the aggregate types
+        fig, ax = plt.subplots()
+        ax.plot(np.arange(len(agg_types)) , agg_types)
+        ax.set_yticks([0, 1, 2])
+        ax.set_yticklabels(["sphere", "oblate", "prolate"])
+        plt.savefig(names_prefix + f"agg_type_evolution_{agg_size}.pdf")
+        plt.show()
+        plt.close('all')
+
+        # CDF
+        shell_volumes_cdf = np.pi * (xedges[1:] ** 2 - xedges[:-1] ** 2) * np.abs(yedges[1] - yedges[0])
         xedges /= 10.0
         yedges /= 10.0
         # prepare to plot
@@ -336,78 +391,103 @@ def compute_rdfs(
         np.savetxt(names_prefix + f"{agg_size}_binsR.txt", xcenters)
         np.savetxt(names_prefix + f"{agg_size}_binsL.txt", ycenters)
 
-        # normalize CDFs and find the lower and largest values
-        minval = np.inf
-        maxval = -np.inf
-        for i in range(len(selections)):
-            cdfs[i] = cdfs[i] / (shell_volumes * nsnaps)
-            if minval > np.min(cdfs[i]):
-                minval = np.min(cdfs[i])
-            if maxval < np.max(cdfs[i]):
-                maxval = np.max(cdfs[i])
-
-        # normalize the CDF by dividing by the shell volume and number of snapshots and plot
-        fig, ax = plt.subplots(nrows=2, ncols=int(np.ceil(len(selections)/2)), figsize=fig_size_cdf)
-        fig.tight_layout(w_pad=0.4, h_pad=1.8)
-        for a in ax.flatten():
-            a.set_aspect(5/10)
-        if int(np.ceil(len(selections)/2)) == 1:
-            for i, sel in enumerate(selections):
-                ax[i].set_title(name_selections[i])
-                ax[i].set_xlim(xedges[[0, -1]])
-                ax[i].set_ylim(yedges[[0, -1]])
-                ax[i].set_xticks(np.linspace(xedges[0], xedges[-1], 3))
-                ax[i].set_yticks(np.linspace(yedges[0], yedges[-1], 7))
-                np.savetxt(names_prefix + f"CDFs_{agg_size}_" + sel.replace(" ", "_") + ".txt", cdfs[i])
-                im = ax[i].imshow(cdfs[i], extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], origin='lower', interpolation='bilinear', vmin=minval, vmax=maxval, cmap=plt.cm.RdBu)
-                ax[i].grid()
-                ax[i].set_xlabel(r"r (nm)")
-                ax[i].set_ylabel(r"h (nm)")
-        else:
-            # make the last plot blank if the number of selections is odd
-            if len(selections) % 2 != 0:
-                ax[-1][-1].axis('off')
-            for i, sel in enumerate(selections):
-                idx1, idx2 = i % 2, int(i / 2)
-                ax[idx1][idx2].set_title(name_selections[i])
-                ax[idx1][idx2].set_xlim(xedges[[0, -1]])
-                ax[idx1][idx2].set_ylim(yedges[[0, -1]])
-                ax[idx1][idx2].set_xticks(np.linspace(xedges[0], xedges[-1], 3))
-                ax[idx1][idx2].set_yticks(np.linspace(yedges[0], yedges[-1], 7))
-                np.savetxt(names_prefix + f"CDFs_{agg_size}_" + sel.replace(" ", "_") + ".txt", cdfs[i])
-                im = ax[idx1][idx2].imshow(cdfs[i], extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], origin='lower', interpolation='bilinear', vmin=minval, vmax=maxval, cmap=plt.cm.RdBu)
-                ax[idx1][idx2].grid()
-                ax[idx1][idx2].set_xlabel(r"r (nm)")
-                ax[idx1][idx2].set_ylabel(r"h (nm)")
-            # Add color bar
-        cax = fig.add_axes([start_colorbar, 0.15, width_colorbar, 0.7])
-        cbar = fig.colorbar(im, cax=cax)
-        cbar.set_label('CDF')
-        fig.subplots_adjust(hspace=0.35, right=start_colorbar)
-        plt.savefig(names_prefix + f"CDFs_{agg_size}.pdf")
-        plt.show()
-        plt.close('all')
-
-        # radial
-        # shell_volumes = np.pi * (bin_edges[1:] ** 2 - bin_edges[:-1] ** 2) * box_vectors[2]
-        shell_volumes = np.pi * (bin_edges[1:] ** 2 - bin_edges[:-1] ** 2) * 2.0 * lmax
+        # RDF
+        shell_volumes_rdf = np.pi * (bin_edges[1:] ** 2 - bin_edges[:-1] ** 2) * 2.0 * lmax
         # Prepare to plot
         mid_bin = (bin_edges[:-1] + bin_edges[1:]) / 2.0
         np.savetxt(names_prefix + f"gperp_{agg_size}_bins.txt", mid_bin)
 
-        # Normalize the RDF by dividing by the shell volume and number of snapshots and plot
-        for i, sel in enumerate(selections):
-            rdfs[i] = rdfs[i] / (shell_volumes * nsnaps)
+        # count agg by type and write to file
+        count_agg_types = {}
+        for i in type_map.values():
+            count_agg_types[i] = np.count_nonzero(np.array(agg_types) == i)
+
+        # write results of agg type detection to file
+        with open("summary_compute_rdf_aggregates.txt", "a") as f:
+            f.write("\nNumber of aggregates classified by type:\n")
+            for k, v in count_agg_types.items():
+                f.write(f"{rev_type_map[k]}: {v} => {100.0 * v/nsnaps:.4f} %. <e> = {np.mean(np.array(eccentricities)[np.array(agg_types) == k]):.4f}\n")
+
+            f.write("\nSnapshot number by aggregate type:\n")
+            snap_map = np.array(snapshots)
+            for k, v in type_map.items():
+                f.write(f"{k}: ")
+                for snap in snap_map[np.array(agg_types) == v]:
+                    f.write(f"{snap}, ")
+                f.write("\n")
+
+        # for each type of aggregate, normalize the CDFs/RDFs and plot
+        for agg_type in type_map.values():
+            # skip if there are no CDFs to plot
+            if count_agg_types[agg_type] == 0:
+                continue
+
+            # normalize CDFs and find the lower and largest values
+            minval = np.inf
+            maxval = -np.inf
+            for i in range(len(selections)):
+                cdfs[agg_type][i] = cdfs[agg_type][i] / (shell_volumes_cdf * count_agg_types[agg_type])
+                if minval > np.min(cdfs[agg_type][i]):
+                    minval = np.min(cdfs[agg_type][i])
+                if maxval < np.max(cdfs[agg_type][i]):
+                    maxval = np.max(cdfs[agg_type][i])
+
+            # normalize the CDF by dividing by the shell volume and number of snapshots and plot
+            fig, ax = plt.subplots(nrows=2, ncols=int(np.ceil(len(selections)/2)), figsize=fig_size_cdf)
+            fig.tight_layout(w_pad=0.4, h_pad=1.8)
+            for a in ax.flatten():
+                a.set_aspect(5/10)
+            if int(np.ceil(len(selections)/2)) == 1:
+                for i, sel in enumerate(selections):
+                    ax[i].set_title(name_selections[i])
+                    ax[i].set_xlim(xedges[[0, -1]])
+                    ax[i].set_ylim(yedges[[0, -1]])
+                    ax[i].set_xticks(np.linspace(xedges[0], xedges[-1], 3))
+                    ax[i].set_yticks(np.linspace(yedges[0], yedges[-1], 7))
+                    np.savetxt(names_prefix + f"CDFs_{agg_size}_" + sel.replace(" ", "_") + "_" + rev_type_map[agg_type] + ".txt", cdfs[agg_type][i])
+                    im = ax[i].imshow(cdfs[agg_type][i], extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], origin='lower', interpolation='bilinear', vmin=minval, vmax=maxval, cmap=plt.cm.RdBu)
+                    ax[i].grid()
+                    ax[i].set_xlabel(r"r (nm)")
+                    ax[i].set_ylabel(r"h (nm)")
+            else:
+                # make the last plot blank if the number of selections is odd
+                if len(selections) % 2 != 0:
+                    ax[-1][-1].axis('off')
+                for i, sel in enumerate(selections):
+                    idx1, idx2 = i % 2, int(i / 2)
+                    ax[idx1][idx2].set_title(name_selections[i])
+                    ax[idx1][idx2].set_xlim(xedges[[0, -1]])
+                    ax[idx1][idx2].set_ylim(yedges[[0, -1]])
+                    ax[idx1][idx2].set_xticks(np.linspace(xedges[0], xedges[-1], 3))
+                    ax[idx1][idx2].set_yticks(np.linspace(yedges[0], yedges[-1], 7))
+                    np.savetxt(names_prefix + f"CDFs_{agg_size}_" + sel.replace(" ", "_") + "_" + rev_type_map[agg_type] + ".txt", cdfs[agg_type][i])
+                    im = ax[idx1][idx2].imshow(cdfs[agg_type][i], extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], origin='lower', interpolation='bilinear', vmin=minval, vmax=maxval, cmap=plt.cm.RdBu)
+                    ax[idx1][idx2].grid()
+                    ax[idx1][idx2].set_xlabel(r"r (nm)")
+                    ax[idx1][idx2].set_ylabel(r"h (nm)")
+                # Add color bar
+            cax = fig.add_axes([start_colorbar, 0.15, width_colorbar, 0.7])
+            cbar = fig.colorbar(im, cax=cax)
+            cbar.set_label('CDF')
+            fig.subplots_adjust(hspace=0.35, right=start_colorbar)
+            plt.savefig(names_prefix + f"CDFs_{agg_size}_{rev_type_map[agg_type]}.pdf")
+            plt.show()
+            plt.close('all')
+
+            # radial
+            # Normalize the RDF by dividing by the shell volume and number of snapshots and plot
+            for i, sel in enumerate(selections):
+                rdfs[agg_type][i] = rdfs[agg_type][i] / (shell_volumes_rdf * count_agg_types[agg_type])
 
-            plt.plot(mid_bin / 10.0, rdfs[i], linewidth=3, label=name_selections[i])
+                plt.plot(mid_bin / 10.0, rdfs[agg_type][i], linewidth=3, label=name_selections[i])
 
-            np.savetxt(names_prefix + f"gperp_{agg_size}_" + sel.replace(" ", "_") + ".txt", rdfs[i])
+                np.savetxt(names_prefix + f"gperp_{agg_size}_" + sel.replace(" ", "_") + ".txt", rdfs[agg_type][i])
 
-        plt.legend(frameon=False)
-        plt.xlabel(r"r (nm)")
-        plt.ylabel(r"g$_\perp$(r)")
-        plt.savefig(names_prefix + f"radial_RDFs_{agg_size}.pdf", bbox_inches="tight")
-        plt.show()
+            plt.legend(frameon=False)
+            plt.xlabel(r"r (nm)")
+            plt.ylabel(r"g$_\perp$(r)")
+            plt.savefig(names_prefix + f"radial_RDFs_{agg_size}_{rev_type_map[agg_type]}.pdf", bbox_inches="tight")
+            plt.show()
 
     # check if rdfs are skipped
     elif not skip_rdfs:
@@ -482,7 +562,7 @@ def compute_rdfs(
             )
             f.write("a = {} A, b = {} A, c = {} A\n\n".format(a, b, c))
 
-            f.write("alpha = {}\n\n".format((2 * Ia - Ib - Ic) / (Ia + Ib + Ic)))
+            f.write("e = {}\n\n".format(1.0 - np.min([Ia, Ib, Ic]) / np.mean([Ia, Ib, Ic])))
             f.write("Rg = {}±{} A\n".format(Rgm, np.std(Rg)))
 
     if compute_pddf:

From 886d0f8f21ba828440cc8f6d0da4e81287758876 Mon Sep 17 00:00:00 2001
From: Henrique Musseli Cezar <henrique.musseli@gmail.com>
Date: Tue, 21 May 2024 11:54:54 +0200
Subject: [PATCH 13/18] Cleanup and add std dev for eccentricity.

---
 utils/compute_rdf_aggregates.py | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py
index 45d4424e..a43296d5 100644
--- a/utils/compute_rdf_aggregates.py
+++ b/utils/compute_rdf_aggregates.py
@@ -270,14 +270,7 @@ def compute_rdfs(
                 I2 = Lambda[1][1]
                 I3 = Lambda[2][2]
 
-                # TODO: based on the three principal moments of inertia we should find
-                # if the aggregate is rod-like, disk-like or sphere-like
-                # and then compute the CDFs accordingly (separating the cases)
-                # this can be done by checking which axis are similar in length and calling them a = b != c
-                # if c/a is close to 1, then it is a sphere
-                # if c/a is greater than 1, then it is prolate
-                # if c/a is smaller than 1, then it is oblate
-                # then we can also output the amount of each type of aggregate
+                # detect the aggregate type
                 type_agg = None
                 # compute eccentricity
                 e = 1.0 - np.min([I1, I2, I3]) / np.mean([I1, I2, I3])
@@ -303,14 +296,6 @@ def compute_rdfs(
                 angle = np.degrees(mdamath.angle(pa, [0,0,1]))
                 ax = transformations.rotaxis(pa, [0,0,1])
                 u.atoms.rotateby(angle, ax, point=cog)
-                # dirname = f"./rotated_cdf_{agg_size}"
-
-                # if not os.path.exists(dirname):
-                #     os.mkdir(dirname)
-
-                # u.atoms.write(
-                #     os.path.join(dirname, f"rotated_{os.path.basename(snapshot)}")
-                # )
 
                 # create selections for which CDFs will be computed
                 for i, sel_string in enumerate(selections):
@@ -406,7 +391,8 @@ def compute_rdfs(
         with open("summary_compute_rdf_aggregates.txt", "a") as f:
             f.write("\nNumber of aggregates classified by type:\n")
             for k, v in count_agg_types.items():
-                f.write(f"{rev_type_map[k]}: {v} => {100.0 * v/nsnaps:.4f} %. <e> = {np.mean(np.array(eccentricities)[np.array(agg_types) == k]):.4f}\n")
+                e_type = np.array(eccentricities)[np.array(agg_types) == k]
+                f.write(f"{rev_type_map[k]}: {v} => {100.0 * v/nsnaps:.4f} %. <e> = {np.mean(e_type):.4f} ± {np.std(e_type):.4f} \n")
 
             f.write("\nSnapshot number by aggregate type:\n")
             snap_map = np.array(snapshots)

From 2298a7f067d104bbb28caf49a10df2a900c94531 Mon Sep 17 00:00:00 2001
From: Henrique Musseli Cezar <henrique.musseli@gmail.com>
Date: Tue, 21 May 2024 12:01:59 +0200
Subject: [PATCH 14/18] Add agg number to summary file name.

---
 utils/compute_rdf_aggregates.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py
index a43296d5..482ebeb1 100644
--- a/utils/compute_rdf_aggregates.py
+++ b/utils/compute_rdf_aggregates.py
@@ -388,7 +388,7 @@ def compute_rdfs(
             count_agg_types[i] = np.count_nonzero(np.array(agg_types) == i)
 
         # write results of agg type detection to file
-        with open("summary_compute_rdf_aggregates.txt", "a") as f:
+        with open(f"summary_compute_rdf_aggregates_{agg_size}.txt", "a") as f:
             f.write("\nNumber of aggregates classified by type:\n")
             for k, v in count_agg_types.items():
                 e_type = np.array(eccentricities)[np.array(agg_types) == k]
@@ -713,7 +713,7 @@ def compute_rdfs(
     topol = process_topology(args.topol)
 
     # write options to file
-    with open("summary_compute_rdf_aggregates.txt", "w") as f:
+    with open(f"summary_compute_rdf_aggregates_{args.agg_size}.txt", "w") as f:
         f.write("Command: " + " ".join(sys.argv) + "\n")
 
     compute_rdfs(

From 437acaf17aeb8247de8d7b61747c5dafe958419b Mon Sep 17 00:00:00 2001
From: Henrique Musseli Cezar <henrique.musseli@gmail.com>
Date: Tue, 21 May 2024 12:26:25 +0200
Subject: [PATCH 15/18] Add eccentricity threshold.

---
 utils/compute_rdf_aggregates.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py
index 482ebeb1..8439e640 100644
--- a/utils/compute_rdf_aggregates.py
+++ b/utils/compute_rdf_aggregates.py
@@ -70,6 +70,7 @@ def compute_rdfs(
     names_prefix,
     start_colorbar,
     width_colorbar,
+    eccentricity_threshold,
     fig_size=(10, 8),
     fig_size_cdf=(12,12),
 ):
@@ -275,7 +276,7 @@ def compute_rdfs(
                 # compute eccentricity
                 e = 1.0 - np.min([I1, I2, I3]) / np.mean([I1, I2, I3])
                 eccentricities.append(e)
-                if e < 0.12:
+                if e < eccentricity_threshold:
                     type_agg = type_map["sphere"]
                 else:
                     # I3 is the lowest value (biggest axis)
@@ -697,6 +698,12 @@ def compute_rdfs(
         default=0.02,
         help="width of the colorbar (default = 0.02)",
     )
+    parser.add_argument(
+        "--eccentricity-threshold",
+        type=float,
+        default=0.12,
+        help="eccentricity threshold to classify the aggregate as a sphere (default = 0.12)",
+    )
 
     args = parser.parse_args()
 
@@ -738,6 +745,7 @@ def compute_rdfs(
         args.names_prefix,
         args.start_colorbar,
         args.width_colorbar,
+        args.eccentricity_threshold,
         tuple(args.fig_size),
         tuple(args.fig_size_cdf),
     )

From 4ad4ef5c273fb15db6ac709603eb16b4f58126d0 Mon Sep 17 00:00:00 2001
From: Henrique Musseli Cezar <henrique.musseli@gmail.com>
Date: Thu, 23 May 2024 11:44:29 +0200
Subject: [PATCH 16/18] Compute actual RDF for spherical aggregates.

---
 utils/compute_rdf_aggregates.py | 36 ++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py
index 8439e640..435ed929 100644
--- a/utils/compute_rdf_aggregates.py
+++ b/utils/compute_rdf_aggregates.py
@@ -323,10 +323,25 @@ def compute_rdfs(
                     )
                     cdfs[type_agg][i] += cdf * np.prod(box_vectors[:3]) / natoms_normalization
 
-                    # Compute the RDF using numpy histogram
-                    rdf, bin_edges = np.histogram(
-                        rdist, bins=nbins, range=(0, rmax), density=False
-                    )
+                    # Compute the RDF using numpy histogram (if sphere it's the actual RDF, not the perpendicular one)
+                    if type_agg == type_map["sphere"]:
+                        # Compute the distances between the target atoms and the center
+                        distances_matrix = distances.distance_array(
+                            cog, at_sel.positions, box=box_vectors, backend="OpenMP"
+                        )
+
+                        # Flatten the distances matrix to a 1D array
+                        flattened_distances = distances_matrix.flatten()
+
+                        # Compute the RDF using numpy histogram
+                        rdf, bin_edges = np.histogram(
+                            flattened_distances, bins=nbins, range=(0, rmax), density=False
+                        )
+                    else:
+                        rdf, bin_edges = np.histogram(
+                            rdist, bins=nbins, range=(0, rmax), density=False
+                        )
+                    
                     rdfs[type_agg][i] += rdf * np.prod(box_vectors[:3]) / natoms_normalization
 
 
@@ -378,7 +393,6 @@ def compute_rdfs(
         np.savetxt(names_prefix + f"{agg_size}_binsL.txt", ycenters)
 
         # RDF
-        shell_volumes_rdf = np.pi * (bin_edges[1:] ** 2 - bin_edges[:-1] ** 2) * 2.0 * lmax
         # Prepare to plot
         mid_bin = (bin_edges[:-1] + bin_edges[1:]) / 2.0
         np.savetxt(names_prefix + f"gperp_{agg_size}_bins.txt", mid_bin)
@@ -463,16 +477,24 @@ def compute_rdfs(
 
             # radial
             # Normalize the RDF by dividing by the shell volume and number of snapshots and plot
+            if agg_type == type_map["sphere"]:
+                shell_volumes_rdf = (4.0 / 3.0) * np.pi * (bin_edges[1:] ** 3 - bin_edges[:-1] ** 3)
+            else:
+                shell_volumes_rdf = np.pi * (bin_edges[1:] ** 2 - bin_edges[:-1] ** 2) * 2.0 * lmax
+
             for i, sel in enumerate(selections):
                 rdfs[agg_type][i] = rdfs[agg_type][i] / (shell_volumes_rdf * count_agg_types[agg_type])
 
                 plt.plot(mid_bin / 10.0, rdfs[agg_type][i], linewidth=3, label=name_selections[i])
 
-                np.savetxt(names_prefix + f"gperp_{agg_size}_" + sel.replace(" ", "_") + ".txt", rdfs[agg_type][i])
+                np.savetxt(names_prefix + f"gperp_{agg_size}_" + sel.replace(" ", "_") + rev_type_map[agg_type] + ".txt", rdfs[agg_type][i])
 
             plt.legend(frameon=False)
             plt.xlabel(r"r (nm)")
-            plt.ylabel(r"g$_\perp$(r)")
+            if agg_type == type_map["sphere"]:
+                plt.ylabel(r"g(r)")
+            else:
+                plt.ylabel(r"g$_\perp$(r)")
             plt.savefig(names_prefix + f"radial_RDFs_{agg_size}_{rev_type_map[agg_type]}.pdf", bbox_inches="tight")
             plt.show()
 

From 956c54cefe50b68f16703dbd1e667c785453619c Mon Sep 17 00:00:00 2001
From: Henrique Musseli Cezar <henrique.musseli@gmail.com>
Date: Thu, 5 Sep 2024 14:01:50 +0200
Subject: [PATCH 17/18] New cluster analysis scripts.

---
 utils/compute_dimension_core_agg.py | 213 ++++++++++++++++++
 utils/compute_rdf_aggregates.py     |  26 ++-
 utils/order_parameter_agg.py        | 328 ++++++++++++++++++++++++++++
 3 files changed, 561 insertions(+), 6 deletions(-)
 create mode 100644 utils/compute_dimension_core_agg.py
 create mode 100644 utils/order_parameter_agg.py

diff --git a/utils/compute_dimension_core_agg.py b/utils/compute_dimension_core_agg.py
new file mode 100644
index 00000000..4b67a44f
--- /dev/null
+++ b/utils/compute_dimension_core_agg.py
@@ -0,0 +1,213 @@
+import MDAnalysis as mda
+from MDAnalysis.lib import distances, mdamath, transformations
+from glob import glob
+import argparse
+from tqdm import tqdm
+import os
+import numpy as np
+import tomli
+
+def process_topology(topol_path):
+    with open(topol_path, "rb") as f:
+        topol = tomli.load(f)
+
+    if "include" in topol["system"]:
+        for file in topol["system"]["include"]:
+            path = os.path.join(os.path.dirname(topol_path), file)
+            with open(path, "rb") as f:
+                itps = tomli.load(f)
+            for mol, itp in itps.items():
+                topol[mol] = itp
+
+    return topol
+
+def parse_masses(fname):
+    if not fname:
+        return None
+
+    masses = {}
+    with open(fname, "r") as f:
+        for line in f:
+            masses[line.split()[0]] = [float(x) for x in line.split()[1:]]
+    return masses
+
+
+
+def compute_order_parameter(topol, pdbdir, agg_size, core_selection, masses, center, eccentricity_threshold=0.14):
+    type_map = {"sphere": 0, "oblate": 1, "prolate": 2}
+    agg_types = []
+    I1 = {0: [], 1: [], 2: []}
+    I2 = {0: [], 1: [], 2: []}
+    I3 = {0: [], 1: [], 2: []}
+
+    nsnaps = 0
+    for snapshot in tqdm(glob(os.path.join(pdbdir, "*.pdb"))):
+        u = mda.Universe(snapshot)
+
+        # based on topology and residues determine size of aggregates
+        resids = {}
+        for residue in u.residues:
+            nummols = int(
+                residue.atoms.positions.shape[0] / topol[residue.resname]["atomnum"]
+            )
+            if residue.resid in resids:
+                resids[residue.resid] += nummols
+            else:
+                resids[residue.resid] = nummols
+
+        # check if we can find an aggregate of the given size
+        agg_resids = []
+        for k, v in resids.items():
+            if v == agg_size:
+                agg_resids.append(k)
+
+        # if size was not found, go to next pdb
+        if len(agg_resids) == 0:
+            continue
+
+        # for each aggregate of given size, get the order parameter
+        for ididx, resid in enumerate(agg_resids):
+            if ididx > 0:
+                u = mda.Universe(snapshot)
+            nsnaps += 1
+            box_vectors = u.dimensions
+            agg_sel = u.select_atoms(f"resid {resid}")
+
+            if not center:
+                center_coord = u.select_atoms(f"resid {resid}").positions
+            else:
+                center_coord = u.select_atoms(f"resid {resid} and " + center).positions
+
+            # get the geometric center using the algorithm:
+            # https://en.wikipedia.org/wiki/Center_of_mass#Systems_with_periodic_boundary_conditions
+            tetha = (center_coord / box_vectors[:3]) * (2.0 * np.pi)
+            xi = np.cos(tetha)
+            zeta = np.sin(tetha)
+
+            xi_bar = np.mean(xi, axis=0)
+            zeta_bar = np.mean(zeta, axis=0)
+            tetha_bar = np.arctan2(-zeta_bar, -xi_bar) + np.pi
+
+            cog = (box_vectors[:3] * tetha_bar) / (2.0 * np.pi)
+
+            box_center = box_vectors[:3] / 2.0
+            u.atoms.translate(box_center - cog)
+            u.atoms.wrap(compound="atoms")
+            cog = box_center
+
+            # set the masses
+            i = 0
+            mass_array = []
+            while i < len(agg_sel.resnames):
+                name = agg_sel.resnames[i]
+                for bmass in masses[name]:
+                    mass_array.append(bmass)
+                i += len(masses[name])
+
+            mass_array = np.array(mass_array)
+            agg_sel.masses = mass_array
+
+            # get moment of inertia and principal axis
+            I = agg_sel.moment_of_inertia()
+            UT = agg_sel.principal_axes()  # this is already transposed
+
+            # diagonalizes I
+            Lambda = UT.dot(I.dot(UT.T))
+
+            Ia = Lambda[0][0]
+            Ib = Lambda[1][1]
+            Ic = Lambda[2][2]
+
+            # detect the aggregate type
+            type_agg = None
+            # compute eccentricity
+            e = 1.0 - np.min([Ia, Ib, Ic]) / np.mean([Ia, Ib, Ic])
+            if e < eccentricity_threshold:
+                type_agg = type_map["sphere"]
+            else:
+                # Ic is the lowest value (biggest axis)
+                if np.abs(Ia - Ib) > np.abs(Ib - Ic): 
+                    continue # if oblate, skip
+                    # type_agg = type_map["oblate"]
+                else:
+                    type_agg = type_map["prolate"]
+            agg_types.append(type_agg)
+
+            # gets symmetry axis
+            if type_agg == type_map["oblate"]:
+                pa = UT[0]
+            else:
+                pa = UT[2]
+
+            # get the angle between the principal axis and the z-axis and rotate universe
+            # this does not take into consideration prolate or oblate micelles
+            angle = np.degrees(mdamath.angle(pa, [0,0,1]))
+            ax = transformations.rotaxis(pa, [0,0,1])
+            u.atoms.rotateby(angle, ax, point=cog)
+
+            # select the core and get dimensions
+            core = u.select_atoms(f"resid {resid} and {core_selection}")
+            total_mass = np.sum(core.masses)
+
+            # get moment of inertia and principal axis
+            I = core.moment_of_inertia()
+            UT = core.principal_axes()  # this is already transposed
+
+            # diagonalizes I
+            Lambda = UT.dot(I.dot(UT.T))
+
+            I1[type_agg].append(Lambda[0][0])
+            I2[type_agg].append(Lambda[1][1])
+            I3[type_agg].append(Lambda[2][2])
+
+    # for each aggregate type, compute the dimensions
+    for k, v in type_map.items():
+        if len(I1[v]) == 0:
+            continue
+            
+        Ia = np.mean(I1[v])
+        Ib = np.mean(I2[v])
+        Ic = np.mean(I3[v])
+
+        a = np.sqrt((5.0 / (2.0 * total_mass)) * (Ib + Ic - Ia))
+        b = np.sqrt((5.0 / (2.0 * total_mass)) * (Ia + Ic - Ib))
+        c = np.sqrt((5.0 / (2.0 * total_mass)) * (Ia + Ib - Ic))
+
+        print(f"Aggregate type: {k}")
+        print(f"Dimensions: a = {a:.4f} A, b = {b:.4f} A, c = {c:.4f} A")
+
+
+if __name__ == "__main__":
+    description = "Given the directory with all the .pdb generated by aggregates.py compute the dimensions for the core of the aggregates."
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument("topol", type=str, help=".toml topology files")
+    parser.add_argument("pdb_dir", type=str, help="directory containing the .pdb")
+    parser.add_argument(
+        "agg_size", type=int, help="aggregate size for which the RDFs will be computed"
+    )
+    parser.add_argument(
+        "core_selection",
+        type=str,
+        help='Selection for the core beads. Example: "name C2 TC5"',
+    )
+    parser.add_argument(
+        "masses",
+        type=str,
+        default=None,
+        help="file containing residue name (molecule name) in first column and a column with the mass for each bead",
+    )
+    parser.add_argument(
+        "--eccentricity-threshold",
+        type=float,
+        default=0.14,
+        help="eccentricity threshold to determine the type of aggregate",
+    )
+    parser.add_argument(
+        "--center",
+        type=str,
+        default=False,
+        help="selection for the center (if not used the center is the centroid)",
+    )
+    args = parser.parse_args()
+    topol = process_topology(args.topol)
+    compute_order_parameter(topol, args.pdb_dir, args.agg_size, args.core_selection, parse_masses(args.masses), args.center, args.eccentricity_threshold)
\ No newline at end of file
diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py
index 435ed929..2e375bd0 100644
--- a/utils/compute_rdf_aggregates.py
+++ b/utils/compute_rdf_aggregates.py
@@ -135,10 +135,11 @@ def compute_rdfs(
         if len(agg_resids) == 0:
             continue
 
-        snapshots.append(os.path.basename(snapshot))
-
         # for each aggregate of given size, accumulate the RDFs
-        for resid in agg_resids:
+        for ididx, resid in enumerate(agg_resids):
+            if ididx > 0:
+                u = mda.Universe(snapshot)
+            snapshots.append(os.path.basename(snapshot))
             nsnaps += 1
             box_vectors = u.dimensions
 
@@ -167,7 +168,7 @@ def compute_rdfs(
                 u.atoms.wrap(compound="atoms")
                 cog = box_center
 
-            if save_agg_only:
+            if save_agg_only and not compute_cdfs:
                 agg_sel = u.select_atoms(f"resid {resid}")
 
                 dirname = f"./agg_only_{agg_size}"
@@ -176,7 +177,7 @@ def compute_rdfs(
                     os.mkdir(dirname)
 
                 agg_sel.atoms.write(
-                    os.path.join(dirname, f"centered_{os.path.basename(snapshot)}")
+                    os.path.join(dirname, f"centered_{resid}_{os.path.basename(snapshot)}")
                 )
 
             if save_centered:
@@ -186,7 +187,7 @@ def compute_rdfs(
                     os.mkdir(dirname)
 
                 u.atoms.write(
-                    os.path.join(dirname, f"centered_{os.path.basename(snapshot)}")
+                    os.path.join(dirname, f"centered_{resid}_{os.path.basename(snapshot)}")
                 )
 
             # set the masses
@@ -298,6 +299,16 @@ def compute_rdfs(
                 ax = transformations.rotaxis(pa, [0,0,1])
                 u.atoms.rotateby(angle, ax, point=cog)
 
+                if save_agg_only:
+                    dirname = f"./agg_only_{agg_size}"
+
+                    if not os.path.exists(dirname):
+                        os.mkdir(dirname)
+
+                    agg_sel.atoms.write(
+                        os.path.join(dirname, f"centered_{resid}_{os.path.basename(snapshot)}")
+                    )
+
                 # create selections for which CDFs will be computed
                 for i, sel_string in enumerate(selections):
                     if sel_string.strip().lower() in ["name w", "type w", "name na", "type na", "name cl", "type cl"]:
@@ -369,6 +380,9 @@ def compute_rdfs(
                     rdfs[i] += rdf * np.prod(box_vectors[:3]) / natoms_normalization
 
 
+    if len(snapshots) == 0:
+        raise ValueError(f"No aggregates of size {agg_size} were found in the directory")
+
     plt.rcParams["figure.figsize"] = fig_size
     plt.rcParams["font.size"] = 22
 
diff --git a/utils/order_parameter_agg.py b/utils/order_parameter_agg.py
new file mode 100644
index 00000000..f0dc55e3
--- /dev/null
+++ b/utils/order_parameter_agg.py
@@ -0,0 +1,328 @@
+import MDAnalysis as mda
+from MDAnalysis.lib import distances, mdamath, transformations
+from glob import glob
+import argparse
+from tqdm import tqdm
+import os
+import numpy as np
+import tomli
+
+def process_topology(topol_path):
+    with open(topol_path, "rb") as f:
+        topol = tomli.load(f)
+
+    if "include" in topol["system"]:
+        for file in topol["system"]["include"]:
+            path = os.path.join(os.path.dirname(topol_path), file)
+            with open(path, "rb") as f:
+                itps = tomli.load(f)
+            for mol, itp in itps.items():
+                topol[mol] = itp
+
+    return topol
+
+def parse_masses(fname):
+    if not fname:
+        return None
+
+    masses = {}
+    with open(fname, "r") as f:
+        for line in f:
+            masses[line.split()[0]] = [float(x) for x in line.split()[1:]]
+    return masses
+
+
+def get_index_beads(topol, bead1, bead2):
+    for i, atom in enumerate(topol["atoms"]):
+        if atom[1] == bead1:
+            bead1_index = i
+        elif atom[1] == bead2:
+            bead2_index = i
+    return bead1_index, bead2_index
+
+
+def compute_order_parameter(topol, pdbdir, agg_size, bead1, bead2, masses, cylinder_length, center, eccentricity_threshold=0.14):
+    type_map = {"sphere": 0, "oblate": 1, "prolate": 2}
+    agg_types = []
+    all_order_parameters = []
+    all_cosines = []
+    all_order_parameters_cylinder = []
+    all_cosines_cylinder = []
+    all_order_parameters_cap = []
+    all_cosines_cap = []
+    all_distances = []
+    distances_sphere = []
+    distances_prolate = []
+    all_angles = []
+    angles_sphere = []
+    angles_prolate = []
+    angles_cap = []
+
+    nsnaps = 0
+    for snapshot in tqdm(glob(os.path.join(pdbdir, "*.pdb"))):
+        u = mda.Universe(snapshot)
+
+        # based on topology and residues determine size of aggregates
+        resids = {}
+        for residue in u.residues:
+            nummols = int(
+                residue.atoms.positions.shape[0] / topol[residue.resname]["atomnum"]
+            )
+            if residue.resid in resids:
+                resids[residue.resid] += nummols
+            else:
+                resids[residue.resid] = nummols
+
+        # check if we can find an aggregate of the given size
+        agg_resids = []
+        for k, v in resids.items():
+            if v == agg_size:
+                agg_resids.append(k)
+
+        # if size was not found, go to next pdb
+        if len(agg_resids) == 0:
+            continue
+
+        # for each aggregate of given size, get the order parameter
+        for ididx, resid in enumerate(agg_resids):
+            if ididx > 0:
+                u = mda.Universe(snapshot)
+            nsnaps += 1
+            box_vectors = u.dimensions
+            agg_sel = u.select_atoms(f"resid {resid}")
+
+            if not center:
+                center_coord = u.select_atoms(f"resid {resid}").positions
+            else:
+                center_coord = u.select_atoms(f"resid {resid} and " + center).positions
+
+            # get the geometric center using the algorithm:
+            # https://en.wikipedia.org/wiki/Center_of_mass#Systems_with_periodic_boundary_conditions
+            tetha = (center_coord / box_vectors[:3]) * (2.0 * np.pi)
+            xi = np.cos(tetha)
+            zeta = np.sin(tetha)
+
+            xi_bar = np.mean(xi, axis=0)
+            zeta_bar = np.mean(zeta, axis=0)
+            tetha_bar = np.arctan2(-zeta_bar, -xi_bar) + np.pi
+
+            cog = (box_vectors[:3] * tetha_bar) / (2.0 * np.pi)
+
+            box_center = box_vectors[:3] / 2.0
+            u.atoms.translate(box_center - cog)
+            u.atoms.wrap(compound="atoms")
+            cog = box_center
+
+            # set the masses
+            i = 0
+            mass_array = []
+            while i < len(agg_sel.resnames):
+                name = agg_sel.resnames[i]
+                for bmass in masses[name]:
+                    mass_array.append(bmass)
+                i += len(masses[name])
+
+            mass_array = np.array(mass_array)
+            agg_sel.masses = mass_array
+            total_mass = np.sum(mass_array)
+
+            # get moment of inertia and principal axis
+            I = agg_sel.moment_of_inertia()
+            UT = agg_sel.principal_axes()  # this is already transposed
+
+            # diagonalizes I
+            Lambda = UT.dot(I.dot(UT.T))
+
+            I1 = Lambda[0][0]
+            I2 = Lambda[1][1]
+            I3 = Lambda[2][2]
+
+            # detect the aggregate type
+            type_agg = None
+            # compute eccentricity
+            e = 1.0 - np.min([I1, I2, I3]) / np.mean([I1, I2, I3])
+            if e < eccentricity_threshold:
+                type_agg = type_map["sphere"]
+            else:
+                # I3 is the lowest value (biggest axis)
+                if np.abs(I1 - I2) > np.abs(I2 - I3): 
+                    continue # if oblate, skip
+                    # type_agg = type_map["oblate"]
+                else:
+                    type_agg = type_map["prolate"]
+            agg_types.append(type_agg)
+
+            # gets symmetry axis
+            if type_agg == type_map["oblate"]:
+                pa = UT[0]
+            else:
+                pa = UT[2]
+
+            # get the angle between the principal axis and the z-axis and rotate universe
+            # this does not take into consideration prolate or oblate micelles
+            angle = np.degrees(mdamath.angle(pa, [0,0,1]))
+            ax = transformations.rotaxis(pa, [0,0,1])
+            u.atoms.rotateby(angle, ax, point=cog)
+
+            # iterate over all molecules computing the order parameter
+            molecule_order_parameters = []
+            simple_cosine = []
+            cylinder_order_parameters = []
+            cylinder_cosine = []
+            cap_order_parameters = []
+            cap_cosine = []
+            for mol in agg_sel.residues:
+                nmols_residue = int(mol.atoms.positions.shape[0] / topol[mol.resname]["atomnum"])
+                for i in range(nmols_residue):
+                    mol_at_positions = mol.atoms.positions[i * topol[mol.resname]["atomnum"] : (i + 1) * topol[mol.resname]["atomnum"]] - cog
+                    # get positions of bead only
+                    bead1_index, bead2_index = get_index_beads(topol[mol.resname], bead1, bead2)                  
+                    orient_vec = mol_at_positions[bead2_index] - mol_at_positions[bead1_index]
+                    all_distances.append(np.linalg.norm(orient_vec))
+
+                    if type_agg == type_map["sphere"]:
+                        distances_sphere.append(np.linalg.norm(orient_vec))
+                        cm_vec = mol_at_positions[bead1_index]
+                        
+                        # compute order parameter
+                        cosine = np.dot(cm_vec, orient_vec) / (np.linalg.norm(cm_vec) * np.linalg.norm(orient_vec))
+                        molecule_order_parameters.append(0.5 * (3.0 * cosine * cosine - 1.0))
+                        simple_cosine.append(cosine)
+                        angles_sphere.append(np.arccos(cosine) * 180.0 / np.pi)
+
+                    elif type_agg == type_map["prolate"]:
+                        distances_prolate.append(np.linalg.norm(orient_vec))
+                        # find if the molecule is in the cylindrical or cap regions
+                        if mol_at_positions[bead1_index][2] > cylinder_length:  # top cap
+                            cm_vec = mol_at_positions[bead1_index]
+                            cm_vec[2] -= cylinder_length
+
+                            cosine = np.dot(cm_vec, orient_vec) / (np.linalg.norm(cm_vec) * np.linalg.norm(orient_vec))
+                            cap_order_parameters.append(0.5 * (3.0 * cosine * cosine - 1.0))
+                            cap_cosine.append(cosine)
+                            angles_cap.append(np.arccos(cosine) * 180.0 / np.pi)
+                        elif mol_at_positions[bead1_index][2] < -cylinder_length:  # bottom cap
+                            cm_vec = mol_at_positions[bead1_index]
+                            cm_vec[2] += cylinder_length
+
+                            cosine = np.dot(cm_vec, orient_vec) / (np.linalg.norm(cm_vec) * np.linalg.norm(orient_vec))
+                            cap_order_parameters.append(0.5 * (3.0 * cosine * cosine - 1.0))
+                            cap_cosine.append(cosine)
+                            angles_cap.append(np.arccos(cosine) * 180.0 / np.pi)
+                        else:  # cylinder part
+                            cm_vec = mol_at_positions[bead1_index]
+                            cm_vec[2] = 0.0
+
+                            cosine = np.dot(cm_vec, orient_vec) / (np.linalg.norm(cm_vec) * np.linalg.norm(orient_vec))
+                            cylinder_order_parameters.append(0.5 * (3.0 * cosine * cosine - 1.0))
+                            cylinder_cosine.append(cosine)
+                            angles_prolate.append(np.arccos(cosine) * 180.0 / np.pi)
+
+
+                    all_angles.append(np.arccos(cosine) * 180.0 / np.pi)
+
+            if type_agg == type_map["sphere"]:
+                all_order_parameters.append(np.mean(molecule_order_parameters))
+                all_cosines.append(np.mean(simple_cosine))
+            elif type_agg == type_map["prolate"]:
+                all_order_parameters_cylinder.append(np.mean(cylinder_order_parameters))
+                all_cosines_cylinder.append(np.mean(cylinder_cosine))
+                all_order_parameters_cap.append(np.mean(cap_order_parameters))
+                all_cosines_cap.append(np.mean(cap_cosine))
+
+    print(f"Aggregate size: {agg_size}")
+    print(f"Eccentricity threshold: {eccentricity_threshold}")
+    print(f"Prolate cylinder half length: {cylinder_length}\n")
+
+    print(f"Percentage of spheres: {agg_types.count(type_map['sphere']) / nsnaps}")
+    print(f"Percentage of oblates: {agg_types.count(type_map['oblate']) / nsnaps}")
+    print(f"Percentage of prolates: {agg_types.count(type_map['prolate']) / nsnaps}\n")
+
+    print(f"Mean distance between beads: {np.mean(all_distances)} ± {np.std(all_distances)}")
+    print(f"Mean distance between beads (sphere): {np.mean(distances_sphere)} ± {np.std(distances_sphere)}")
+    print(f"Mean distance between beads (prolate): {np.mean(distances_prolate)} ± {np.std(distances_prolate)}\n")
+
+    print(f"Mean order parameter (sphere): {np.mean(all_order_parameters)} ± {np.std(all_order_parameters)}")
+    print(f"Mean cosine (sphere): {np.mean(all_cosines)} ± {np.std(all_cosines)}")
+    print(f"Mean order parameter (cylinder): {np.mean(all_order_parameters_cylinder)} ± {np.std(all_order_parameters_cylinder)}")
+    print(f"Mean cosine (cylinder): {np.mean(all_cosines_cylinder)} ± {np.std(all_cosines_cylinder)}")
+    print(f"Mean order parameter (cap): {np.mean(all_order_parameters_cap)} ± {np.std(all_order_parameters_cap)}")
+    print(f"Mean cosine (cap): {np.mean(all_cosines_cap)} ± {np.std(all_cosines_cap)}\n")
+
+    print(f"Mean angle (all): {np.mean(all_angles)} ± {np.std(all_angles)}")
+
+    import matplotlib.pyplot as plt
+    plt.rcParams["figure.figsize"] = (10, 8)
+    plt.rcParams["font.size"] = 22
+
+    # Plotting distributions
+    angles = [all_angles, angles_cap, angles_prolate, angles_sphere]
+    labels = ['All Angles', 'Cap Angles', 'Prolate Angles', 'Sphere Angles']
+    colors = ['C0', 'C1', 'C2', 'C3']
+    counts = [len(angle_list) for angle_list in angles]
+    sorted_angles = [angle_list for _, angle_list in sorted(zip(counts, angles), reverse=True)]
+    sorted_labels = [label for _, label in sorted(zip(counts, labels), reverse=True)]
+    sorted_colors = [color for _, color in sorted(zip(counts, colors), reverse=True)]
+    
+    for angle_list, label, color in zip(sorted_angles, sorted_labels, sorted_colors):
+        plt.hist(angle_list, bins=50, alpha=0.5, label=label, color=color)
+
+    # Adding labels and legend
+    plt.xlabel(r'Angle ($\degree$)')
+    plt.ylabel('Frequency')
+    plt.legend()
+
+    # Displaying the plot
+    plt.savefig(f"distr_angles_surface_{agg_size}.pdf", bbox_inches='tight')
+    plt.show()
+
+
+if __name__ == "__main__":
+    description = "Given the directory with all the .pdb generated by aggregates.py compute the order parameters for each aggregate type"
+    parser = argparse.ArgumentParser(description=description)
+    parser.add_argument("topol", type=str, help=".toml topology files")
+    parser.add_argument("pdb_dir", type=str, help="directory containing the .pdb")
+    parser.add_argument(
+        "agg_size", type=int, help="aggregate size for which the RDFs will be computed"
+    )
+    parser.add_argument(
+        "bead1",
+        type=str,
+        help='name of first bead in the order parameter calculation. Example: "TN2a"',
+    )
+    parser.add_argument(
+        "bead2",
+        type=str,
+        help='name of second bead in the order parameter calculation. Example: "TP1"',
+    )
+    parser.add_argument(
+        "masses",
+        type=str,
+        default=None,
+        help="file containing residue name (molecule name) in first column and a column with the mass for each bead",
+    )
+    parser.add_argument(
+        "--cylinder-length",
+        type=float,
+        default=0.0,
+        help="HALF length of the cylinder (not considering the cap) to compute the order parameter",
+    )
+    parser.add_argument(
+        "--eccentricity-threshold",
+        type=float,
+        default=0.14,
+        help="eccentricity threshold to determine the type of aggregate",
+    )
+    parser.add_argument(
+        "--center",
+        type=str,
+        default=False,
+        help="selection for the center (if not used the center is the centroid)",
+    )
+
+    args = parser.parse_args()
+
+    topol = process_topology(args.topol)
+
+    compute_order_parameter(topol, args.pdb_dir, args.agg_size, args.bead1, args.bead2, parse_masses(args.masses), args.cylinder_length, args.center, args.eccentricity_threshold)
+

From b136a28b0fe3f1b2b1f88320b96164b8eedeca0e Mon Sep 17 00:00:00 2001
From: Henrique Musseli Cezar <henrique.musseli@gmail.com>
Date: Thu, 5 Sep 2024 14:31:26 +0200
Subject: [PATCH 18/18] Alllow mpirun to oversubscribe to avoid CI errors.

---
 pytest-mpi | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pytest-mpi b/pytest-mpi
index b661bf84..97a817ce 100755
--- a/pytest-mpi
+++ b/pytest-mpi
@@ -82,7 +82,7 @@ else
   fi
 
   if [ "${ORDEROUTPUT}" -eq 1 ]; then
-    mpirun -n ${NPROCS} --output-filename outtest pytest ${PYTEST_ARGS} --only-mpi >/dev/null
+    mpirun -n ${NPROCS} --oversubscribe --output-filename outtest pytest ${PYTEST_ARGS} --only-mpi >/dev/null
     exit_code=$?
     for ((RANK=0; RANK<${NPROCS}; RANK++)); do
       RANKFILE="outtest/1/rank.${RANK}/stdout"
@@ -94,10 +94,10 @@ else
     done
     rm -r outtest
   elif [ "${UNMUTE}" -eq -1 ]; then
-    mpirun -n ${NPROCS} pytest ${PYTEST_ARGS} --only-mpi
+    mpirun -n ${NPROCS} --oversubscribe pytest ${PYTEST_ARGS} --only-mpi
     exit_code=$?
   else
-    mpirun -n ${NPROCS} utils/mute_all_ranks_except.sh ${UNMUTE} pytest ${PYTEST_ARGS} --only-mpi
+    mpirun -n ${NPROCS} --oversubscribe utils/mute_all_ranks_except.sh ${UNMUTE} pytest ${PYTEST_ARGS} --only-mpi
     exit_code=$?
   fi
 fi