From 0f0d8425b96ce4bc92377b6ee135d3f1e6d0b2b3 Mon Sep 17 00:00:00 2001 From: Henrique Musseli Cezar Date: Tue, 7 Nov 2023 14:12:58 +0100 Subject: [PATCH 01/18] Fix compute_rdf_aggregates without names --- utils/compute_rdf_aggregates.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py index 53e37ced..12b4b5ee 100644 --- a/utils/compute_rdf_aggregates.py +++ b/utils/compute_rdf_aggregates.py @@ -394,6 +394,8 @@ def compute_rdfs( if args.name_selections and (len(args.selections) != len(args.name_selections)): raise AssertionError("the number of selections must be equal to the number of names") + elif not args.name_selections: + args.name_selections = args.selections topol = process_topology(args.topol) From 21afffb6fd57d54e8ba42ae0b07e69b893e9f3c1 Mon Sep 17 00:00:00 2001 From: Henrique Musseli Cezar Date: Fri, 29 Dec 2023 09:51:01 +0100 Subject: [PATCH 02/18] Fix pddf --- utils/compute_rdf_aggregates.py | 47 ++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py index 12b4b5ee..0eeaa4ab 100644 --- a/utils/compute_rdf_aggregates.py +++ b/utils/compute_rdf_aggregates.py @@ -1,5 +1,6 @@ import MDAnalysis as mda from MDAnalysis.lib import distances +from scipy.spatial.distance import squareform import numpy as np import matplotlib.pyplot as plt import os @@ -184,13 +185,41 @@ def compute_rdfs( result=cond_distmat, backend="OpenMP", ) + # sqdistmat = squareform(cond_distmat) - pddf_i, pddf_edges = np.histogram( - cond_distmat, - bins=nbins, - range=(0, rmax), - density=False, - ) + # load the toml file with the form factors + with open(compute_pddf, "rb") as f: + form_factors = tomli.load(f) + + # Create a 2D array of form factors for all pairs of atoms + form_factors_array = np.array([form_factors[atom.residue.resname][atom.name][0] for atom in agg_sel]) + + # Compute the weights for all pairs of atoms + weights_matrix = np.outer(form_factors_array, form_factors_array) + + # Get the upper triangular part of the weights matrix + weights = weights_matrix[np.triu_indices(n, k=1)] + + # Compute the histogram + pddf_i, pddf_edges = np.histogram(cond_distmat, bins=nbins, range=(0, rmax), weights=weights, density=False) + + # deltaR = rmax / nbins + # pddf_i = np.zeros((nbins,), dtype=np.float64) + # pddf_dists = np.linspace(deltaR/2.0, rmax - deltaR/2.0, nbins) + # for i in range(n): + # for j in range(i + 1, n): + # if sqdistmat[i][j] <= rmax: + # bin_distance = np.floor(sqdistmat[i][j]/deltaR) + + # pddf_i[int(bin_distance)] += form_factors[agg_sel[i].residue.resname][agg_sel[i].name][0] * form_factors[agg_sel[j].residue.resname][agg_sel[j].name][0] + + + # pddf_i, pddf_edges = np.histogram( + # cond_distmat, + # bins=nbins, + # range=(0, rmax), + # density=False, + # ) pddf += pddf_i @@ -382,9 +411,9 @@ def compute_rdfs( ) parser.add_argument( "--compute-pddf", - action="store_true", - default=False, - help="compute the pair distance distribution function (PDDF)", + type=str, + default=None, + help="compute the pair distance distribution function (PDDF) - needs a form factor .toml file", ) args = parser.parse_args() From f8fc206d65721a327b4f17d01ddbdc567ec6080f Mon Sep 17 00:00:00 2001 From: Henrique Musseli Cezar Date: Fri, 29 Dec 2023 11:20:14 +0100 Subject: [PATCH 03/18] Changed PDDF to be computed using PLUMED parameters. --- utils/compute_rdf_aggregates.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py index 0eeaa4ab..b07f66b6 100644 --- a/utils/compute_rdf_aggregates.py +++ b/utils/compute_rdf_aggregates.py @@ -37,6 +37,16 @@ def parse_masses(fname): return masses +def parse_parametersfile(fname): + params = [] + with open(fname, "r") as f: + for line in f: + if "PARAMETERS" in line: + right_line = line.split("=")[1].strip() + params.append(float(right_line.split(",")[0])) # get only the first parameter even for SAXS + return params + + def compute_rdfs( topol, pdbdir, @@ -64,6 +74,9 @@ def compute_rdfs( Rg = [] if compute_pddf: pddf = np.zeros(nbins) + + # read the form factors / scattering lengths + form_factors = parse_parametersfile(compute_pddf) nsnaps = 0 for snapshot in tqdm(glob(os.path.join(pdbdir, "*.pdb"))): @@ -187,12 +200,8 @@ def compute_rdfs( ) # sqdistmat = squareform(cond_distmat) - # load the toml file with the form factors - with open(compute_pddf, "rb") as f: - form_factors = tomli.load(f) - # Create a 2D array of form factors for all pairs of atoms - form_factors_array = np.array([form_factors[atom.residue.resname][atom.name][0] for atom in agg_sel]) + form_factors_array = np.array([form_factors[atom.index] for atom in agg_sel]) # Compute the weights for all pairs of atoms weights_matrix = np.outer(form_factors_array, form_factors_array) From 91d887c6e45171a2f9eabeacd93dc168efce00a4 Mon Sep 17 00:00:00 2001 From: Manuel Carrer Date: Fri, 2 Feb 2024 23:01:13 +0100 Subject: [PATCH 04/18] Add name selection in center_group.py --- utils/center_group.py | 190 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 157 insertions(+), 33 deletions(-) diff --git a/utils/center_group.py b/utils/center_group.py index b3cf7555..60479201 100644 --- a/utils/center_group.py +++ b/utils/center_group.py @@ -1,8 +1,10 @@ -import os import argparse +import os +import re +import sys + import h5py import numpy as np -import re # based on https://stackoverflow.com/a/6512463/3254658 @@ -22,6 +24,19 @@ def parse_bead_list(string): ) return list(range(start, end + 1)) +def parse_name_list(selection, h5md_file): + f_in = h5py.File(h5md_file, "r") + names = f_in["parameters/vmd_structure/name"][:] + species = f_in["particles/all/species"][:] + name_to_species = {n: s for s, n in enumerate(names)} + + name_list = np.empty(0, dtype=np.int32) + for name in selection: + name_list = np.append(name_list, np.where(species == name_to_species[np.bytes_(name)])) + + f_in.close() + return name_list + def get_centers(positions, box, nrefs=3): centers = np.empty((0, positions.shape[2])) @@ -54,23 +69,7 @@ def get_centers(positions, box, nrefs=3): return centers -def center_trajectory( - h5md_file, bead_list, nrefbeads, overwrite=False, out_path=None, center_last=False -): - if out_path is None: - out_path = os.path.join( - os.path.abspath(os.path.dirname(h5md_file)), - os.path.splitext(os.path.split(h5md_file)[-1])[0] - + "_new" - + os.path.splitext(os.path.split(h5md_file)[-1])[1], - ) - if os.path.exists(out_path) and not overwrite: - error_str = ( - f"The specified output file {out_path} already exists. " - f'use overwrite=True ("-f" flag) to overwrite.' - ) - raise FileExistsError(error_str) - +def center_trajectory_mic(h5md_file, bead_list, nrefbeads, out_path, center_last=False): f_in = h5py.File(h5md_file, "r") f_out = h5py.File(out_path, "w") @@ -81,7 +80,6 @@ def center_trajectory( f_in.copy(k, f_out) box_size = f_in["particles/all/box/edges/value"][:] - beads_pos = f_in["particles/all/position/value"][:][:, bead_list, :] centers = get_centers(beads_pos, box_size, nrefbeads) @@ -108,6 +106,68 @@ def center_trajectory( f_in.close() f_out.close() +def center_of_mass(pos, n, box): + p_mapped = 2 * np.pi * pos / box + cos_p_mapped = np.cos(p_mapped) + sin_p_mapped = np.sin(p_mapped) + + cos_average = np.sum(cos_p_mapped) / n + sin_average = np.sum(sin_p_mapped) / n + + theta = np.arctan2(-sin_average, -cos_average) + np.pi + return box * theta / (2 * np.pi) + + +def get_centers_com(positions, box_size, axis): + frames = positions.shape[0] + n = positions.shape[1] + centers = np.zeros((frames, 3)) + for frame in range(frames): + centers[frame, axis] = center_of_mass(positions[frame, :, axis], n, box_size[frame, axis]) + return centers + + +def center_trajectory_com(h5md_file, bead_list, axis, out_path): + f_in = h5py.File(h5md_file, "r") + f_out = h5py.File(out_path, "w") + + for k, v in f_in.attrs.items(): + f_out.attrs[k] = v + + for k in f_in.keys(): + f_in.copy(k, f_out) + f_in.close() + + box_size = f_out["particles/all/box/edges/value"][:] + beads_pos = f_out["particles/all/position/value"][:][:, bead_list, :] + + box_diag = np.diag(box_size[0]) + for frame in range(1, box_size.shape[0]): + box_diag = np.vstack((box_diag, np.diag(box_size[frame]))) + + centers = get_centers_com(beads_pos, box_diag, axis) + + mask = np.eye(1, 3, k=axis) + box_translate = 0.5 * mask * box_diag + + translate = box_translate - centers + tpos = f_out["particles/all/position/value"] + np.repeat( + translate[:, np.newaxis, :], + f_out["particles/all/position/value"].shape[1], + axis=1, + ) + + tpos = np.mod( + tpos[:, :, :], np.repeat(box_diag[:, np.newaxis, :], tpos.shape[1], axis=1) + ) + + f_out["particles/all/position/value"][:] = tpos - np.repeat( + box_translate[:, np.newaxis, :], + f_out["particles/all/position/value"].shape[1], + axis=1, + ) + f_out.close() + if __name__ == "__main__": description = ( @@ -120,9 +180,35 @@ def center_trajectory( "--beads", type=parse_bead_list, nargs="+", - required=True, + default=None, help="bead list to center (e.g.: 1-100 102-150)", ) + parser.add_argument( + "-n", + "--names", + type=str, + nargs="+", + default=None, + help="Names of bead to center (e.g.: C1 N G2)", + ) + parser.add_argument( + "-m", + "--method", + choices=["COM", "MIC"], + type=str, + default=None, + help="Specify the centering method. Available methods:\n" + "COM: center the box around the center of mass of the given groups along a direction (axis)\n" + "MIC: center the box around a centroid of the group that assures the minimal image convention", + ) + parser.add_argument( + "--axis", + type=int, + choices=[0, 1, 2], + default=2, + required='COM' in sys.argv, + help="Direction along which to calculate the center of mass." + ) parser.add_argument( "-o", "--out", @@ -155,17 +241,55 @@ def center_trajectory( ) args = parser.parse_args() - bead_list = [] - for interval in args.beads: - bead_list += interval + if args.beads is None and args.names is None: + error_str = "Either the 'beads' or 'names' variable must be provided." + raise ValueError(error_str) + + if args.out_path is None: + args.out_path = os.path.join( + os.path.abspath(os.path.dirname(args.h5md_file)), + os.path.splitext(os.path.split(args.h5md_file)[-1])[0] + + "_new" + + os.path.splitext(os.path.split(args.h5md_file)[-1])[1], + ) + if os.path.exists(args.out_path) and not args.force: + error_str = ( + f"The specified output file {args.out_path} already exists. " + f'use overwrite=True ("-f" flag) to overwrite.' + ) + raise FileExistsError(error_str) + + if args.beads is not None: + bead_list = [] + for interval in args.beads: + bead_list += interval - bead_list = np.array(sorted(bead_list)) - 1 + bead_list = np.array(sorted(bead_list)) - 1 + + if args.names is not None: + name_list = parse_name_list(args.names, args.h5md_file) + + if args.beads is not None and args.names is not None: + atom_list = np.intersect1d(bead_list, name_list) + elif args.beads is not None: + atom_list = bead_list + else: + atom_list = name_list + + if args.method == "COM": + center_trajectory_com( + args.h5md_file, + atom_list, + args.axis, + args.out_path, + ) + + if args.method == "MIC": + center_trajectory_mic( + args.h5md_file, + atom_list, + nrefbeads=args.nrefs, + out_path=args.out_path, + center_last=args.center_last, + ) - center_trajectory( - args.h5md_file, - bead_list, - nrefbeads=args.nrefs, - overwrite=args.force, - out_path=args.out_path, - center_last=args.center_last, - ) From c9c830e80eab57e1152228b50ea59a8a0c9c58a2 Mon Sep 17 00:00:00 2001 From: Manuel Carrer Date: Sat, 3 Feb 2024 12:52:48 +0100 Subject: [PATCH 05/18] We don't need to shift the COM to zero --- utils/center_group.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/utils/center_group.py b/utils/center_group.py index 60479201..2e34dd95 100644 --- a/utils/center_group.py +++ b/utils/center_group.py @@ -157,15 +157,9 @@ def center_trajectory_com(h5md_file, bead_list, axis, out_path): axis=1, ) - tpos = np.mod( + f_out["particles/all/position/value"][:] = np.mod( tpos[:, :, :], np.repeat(box_diag[:, np.newaxis, :], tpos.shape[1], axis=1) ) - - f_out["particles/all/position/value"][:] = tpos - np.repeat( - box_translate[:, np.newaxis, :], - f_out["particles/all/position/value"].shape[1], - axis=1, - ) f_out.close() From a9ca8ac8b04678f15b0860e430a89ee4c759190b Mon Sep 17 00:00:00 2001 From: Manuel Carrer Date: Sat, 3 Feb 2024 12:59:14 +0100 Subject: [PATCH 06/18] Cleanup --- utils/center_group.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/utils/center_group.py b/utils/center_group.py index 2e34dd95..a7254444 100644 --- a/utils/center_group.py +++ b/utils/center_group.py @@ -148,17 +148,18 @@ def center_trajectory_com(h5md_file, bead_list, axis, out_path): centers = get_centers_com(beads_pos, box_diag, axis) mask = np.eye(1, 3, k=axis) - box_translate = 0.5 * mask * box_diag - - translate = box_translate - centers - tpos = f_out["particles/all/position/value"] + np.repeat( + translate = 0.5 * mask * box_diag - centers + + translations = np.repeat( translate[:, np.newaxis, :], - f_out["particles/all/position/value"].shape[1], + f_in["particles/all/position/value"].shape[1], axis=1, ) + + tpos = f_out["particles/all/position/value"] + translations f_out["particles/all/position/value"][:] = np.mod( - tpos[:, :, :], np.repeat(box_diag[:, np.newaxis, :], tpos.shape[1], axis=1) + tpos, np.repeat(box_diag[:, np.newaxis, :], tpos.shape[1], axis=1) ) f_out.close() From 33c74b96dfab307dd05b6d7fae0eb4f43f57e556 Mon Sep 17 00:00:00 2001 From: Manuel Carrer Date: Sun, 4 Feb 2024 17:37:03 +0100 Subject: [PATCH 07/18] Fix wrong variable --- utils/center_group.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/center_group.py b/utils/center_group.py index a7254444..2d8bb9d8 100644 --- a/utils/center_group.py +++ b/utils/center_group.py @@ -152,7 +152,7 @@ def center_trajectory_com(h5md_file, bead_list, axis, out_path): translations = np.repeat( translate[:, np.newaxis, :], - f_in["particles/all/position/value"].shape[1], + f_out["particles/all/position/value"].shape[1], axis=1, ) From 1debff6a31e91d23dc9768afb23591dc86801c4c Mon Sep 17 00:00:00 2001 From: Manuel Carrer Date: Sun, 4 Feb 2024 19:17:21 +0100 Subject: [PATCH 08/18] Use np.mean --- utils/center_group.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/utils/center_group.py b/utils/center_group.py index 2d8bb9d8..a8c56696 100644 --- a/utils/center_group.py +++ b/utils/center_group.py @@ -106,13 +106,13 @@ def center_trajectory_mic(h5md_file, bead_list, nrefbeads, out_path, center_last f_in.close() f_out.close() -def center_of_mass(pos, n, box): +def center_of_mass(pos, box): p_mapped = 2 * np.pi * pos / box cos_p_mapped = np.cos(p_mapped) sin_p_mapped = np.sin(p_mapped) - cos_average = np.sum(cos_p_mapped) / n - sin_average = np.sum(sin_p_mapped) / n + cos_average = np.mean(cos_p_mapped) + sin_average = np.mean(sin_p_mapped) theta = np.arctan2(-sin_average, -cos_average) + np.pi return box * theta / (2 * np.pi) @@ -120,10 +120,9 @@ def center_of_mass(pos, n, box): def get_centers_com(positions, box_size, axis): frames = positions.shape[0] - n = positions.shape[1] centers = np.zeros((frames, 3)) for frame in range(frames): - centers[frame, axis] = center_of_mass(positions[frame, :, axis], n, box_size[frame, axis]) + centers[frame, axis] = center_of_mass(positions[frame, :, axis], box_size[frame, axis]) return centers From 78a18a2267e2b9ac369e99c7589a5b590edf849f Mon Sep 17 00:00:00 2001 From: Henrique Musseli Cezar Date: Wed, 28 Feb 2024 16:33:54 +0100 Subject: [PATCH 09/18] Add option to font sizes in aggregates.py --- utils/aggregates.py | 55 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/utils/aggregates.py b/utils/aggregates.py index 596aeecc..0c7964cb 100644 --- a/utils/aggregates.py +++ b/utils/aggregates.py @@ -1,3 +1,5 @@ +# PYTHON_ARGCOMPLETE_OK + import MDAnalysis as mda from MDAnalysis.analysis import distances import numpy as np @@ -6,7 +8,7 @@ from matplotlib.ticker import MaxNLocator import os import sys -import argparse +import argcomplete, argparse from tqdm import tqdm import warnings import dask @@ -134,6 +136,8 @@ def aggregates_clustering( plot_dendrograms, traj_in_memory, save_solvent, + font_size, + caption_font_size, summary_fig_size=(12, 8), ): u = mda.Universe(grofile, h5mdfile, in_memory=traj_in_memory) @@ -236,30 +240,42 @@ def aggregates_clustering( # plot results _, axs = plt.subplots(2, 2, figsize=summary_fig_size) + plt.rcParams.update({"font.size": font_size}) + + axs[0, 0].text(-0.2, 1.05, "(a)", transform=axs[0, 0].transAxes, fontsize=caption_font_size, va='top', ha='right') + axs[0, 1].text(-0.2, 1.05, "(b)", transform=axs[0, 1].transAxes, fontsize=caption_font_size, va='top', ha='right') + axs[1, 0].text(-0.2, 1.05, "(c)", transform=axs[1, 0].transAxes, fontsize=caption_font_size, va='top', ha='right') + axs[1, 1].text(-0.2, 1.05, "(d)", transform=axs[1, 1].transAxes, fontsize=caption_font_size, va='top', ha='right') axs[0, 0].plot(frames, n_clusters) axs[0, 0].axhline(avg_n_aggs, linestyle="--") - axs[0, 0].set_ylabel("Num. of aggregates") - axs[0, 0].set_xlabel("Frame") + axs[0, 0].set_ylabel("Num. of aggregates", fontsize=font_size) + axs[0, 0].set_xlabel("Frame", fontsize=font_size) + # axs[0, 0].set_ylim([4, 6]) axs[0, 0].yaxis.set_major_locator(MaxNLocator(integer=True)) xticklabels = [f"{sizes[i]}" for i in range(len(sizes))] axs[0, 1].bar(xticklabels, freq, width=0.8) - axs[0, 1].set_ylabel("Avg. num. per snapshot") - axs[0, 1].set_xlabel("Aggregate size") - axs[0, 1].tick_params("x", labelrotation=60) + axs[0, 1].set_ylabel("Avg. num. per snapshot", fontsize=font_size) + axs[0, 1].set_xlabel("Aggregate size", fontsize=font_size) + axs[0, 1].tick_params("x", labelrotation=60, labelsize=font_size) xticklabels = [f"{k}" for k in prob_by_size.keys()] axs[1, 0].bar(xticklabels, prob_by_size.values(), width=0.8) - axs[1, 0].set_ylabel("Prob.") - axs[1, 0].set_xlabel("Aggregate size") - axs[1, 0].tick_params("x", labelrotation=60) + axs[1, 0].set_ylabel("Prob.", fontsize=font_size) + axs[1, 0].set_xlabel("Aggregate size", fontsize=font_size) + axs[1, 0].tick_params("x", labelrotation=60, labelsize=font_size) xticklabels = [f"{k}" for k in prob_mol_size.keys()] axs[1, 1].bar(xticklabels, prob_mol_size.values(), width=0.8) - axs[1, 1].set_ylabel("Prob. molecule in agg.") - axs[1, 1].set_xlabel("Aggregate size") - axs[1, 1].tick_params("x", labelrotation=60) + axs[1, 1].set_ylabel("Prob. molecule in agg.", fontsize=font_size) + axs[1, 1].set_xlabel("Aggregate size", fontsize=font_size) + axs[1, 1].tick_params("x", labelrotation=60, labelsize=font_size) + + axs[0, 0].tick_params(axis='both', which='major', labelsize=font_size) + axs[0, 1].tick_params(axis='both', which='major', labelsize=font_size) + axs[1, 0].tick_params(axis='both', which='major', labelsize=font_size) + axs[1, 1].tick_params(axis='both', which='major', labelsize=font_size) plt.tight_layout() plt.savefig("summary_clustering.pdf", bbox_inches="tight") @@ -351,6 +367,18 @@ def aggregates_clustering( default=(8, 6), help="two integers to define the size of the summary figure (default = 8 6)", ) + parser.add_argument( + "--font-size", + type=int, + default=12, + help="font size for the summary figure (default = 12)", + ) + parser.add_argument( + "--caption-font-size", + type=int, + default=13, + help="font size for the captions of summary figure (default = 13)", + ) parser.add_argument( "--traj-in-memory", action="store_true", @@ -358,6 +386,7 @@ def aggregates_clustering( help="load the whole trajectory in memory with MDAanalysis", ) + argcomplete.autocomplete(parser) args = parser.parse_args() if os.path.splitext(args.h5md_file)[1].lower() == ".h5": @@ -404,5 +433,7 @@ def aggregates_clustering( args.plot_dendrograms, args.traj_in_memory, args.save_solvent, + args.font_size, + args.caption_font_size, tuple(args.summary_fig_size), ) From c406ed1d3f783c92f08fca5ffdb399f39a1f2560 Mon Sep 17 00:00:00 2001 From: Henrique Musseli Cezar Date: Fri, 8 Mar 2024 16:51:39 +0100 Subject: [PATCH 10/18] Print clusters found in each frame. --- utils/aggregates.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/utils/aggregates.py b/utils/aggregates.py index 0c7964cb..c7310685 100644 --- a/utils/aggregates.py +++ b/utils/aggregates.py @@ -186,21 +186,19 @@ def aggregates_clustering( n_clusters = [] all_sizes = [] - total_clust_by_size = {} + clusters_per_frame = [] for c in clusters[0]: # get the number of clusters and sizes unique_clusts, clust_counts = np.unique(c, return_counts=True) - for size in clust_counts: - if size not in total_clust_by_size: - total_clust_by_size[size] = 1 - else: - total_clust_by_size[size] += 1 + n_clusters.append(len(unique_clusts)) + clusters_per_frame.append(np.sort(clust_counts)) all_sizes += clust_counts.tolist() # based on cluster sizes get occurence of each size sizes, freq = np.unique(all_sizes, return_counts=True) + total_clust_by_size = dict(zip(sizes, freq)) freq = freq / len(u.trajectory[skip:end:stride]) # overall average number of aggregates @@ -238,6 +236,12 @@ def aggregates_clustering( for s, p in prob_mol_size.items(): of.write(f"{s}\t{p}\n") + of.write("\nClusters sizes in each analyzed frame:\n") + for c in clusters_per_frame: + for s in c: + of.write(f"{s}\t") + of.write("\n") + # plot results _, axs = plt.subplots(2, 2, figsize=summary_fig_size) plt.rcParams.update({"font.size": font_size}) From 7b146e40cf89722618ef9ce453681f46b5ea820b Mon Sep 17 00:00:00 2001 From: Henrique Musseli Cezar Date: Fri, 5 Apr 2024 16:06:54 +0200 Subject: [PATCH 11/18] Add option to compute CDF. --- utils/compute_rdf_aggregates.py | 292 ++++++++++++++++++++++++++++---- 1 file changed, 255 insertions(+), 37 deletions(-) diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py index b07f66b6..0ff7ec41 100644 --- a/utils/compute_rdf_aggregates.py +++ b/utils/compute_rdf_aggregates.py @@ -1,8 +1,9 @@ import MDAnalysis as mda -from MDAnalysis.lib import distances +from MDAnalysis.lib import distances, mdamath, transformations from scipy.spatial.distance import squareform import numpy as np import matplotlib.pyplot as plt +from matplotlib.image import NonUniformImage import os import sys from glob import glob @@ -55,18 +56,29 @@ def compute_rdfs( name_selections, center, nbins, + nbinsl, rmax, + lmax, + natoms_normalization, masses, skip_rdfs, compute_i_rg, compute_pddf, + compute_cdfs, save_centered, save_agg_only, + names_prefix, + start_colorbar, + width_colorbar, + fig_size=(10, 8), + fig_size_cdf=(12,12), ): # initialize rdfs = {} + cdfs = {} for i in range(len(selections)): rdfs[i] = np.zeros(nbins) + cdfs[i] = np.zeros((nbinsl, nbins)) if compute_i_rg: I1 = [] I2 = [] @@ -78,10 +90,17 @@ def compute_rdfs( # read the form factors / scattering lengths form_factors = parse_parametersfile(compute_pddf) + if not names_prefix: + names_prefix = "" + nsnaps = 0 for snapshot in tqdm(glob(os.path.join(pdbdir, "*.pdb"))): u = mda.Universe(snapshot) + if not natoms_normalization: + natoms_normalization = len(u.atoms) + + # based on topology and residues determine size of aggregates resids = {} for residue in u.residues: @@ -127,7 +146,7 @@ def compute_rdfs( # since some methods are not PBC aware, we center the # aggregate in the box so it does not split in the PBCs - if save_agg_only or save_centered or compute_i_rg: + if save_agg_only or save_centered or compute_i_rg or compute_cdfs: box_center = box_vectors[:3] / 2.0 u.atoms.translate(box_center - cog) u.atoms.wrap(compound="atoms") @@ -155,8 +174,8 @@ def compute_rdfs( os.path.join(dirname, f"centered_{os.path.basename(snapshot)}") ) - # compute the principal moment of inertia and Rg - if compute_i_rg: + # set the masses + if masses: # set the masses agg_sel = u.select_atoms(f"resid {resid}") @@ -172,6 +191,10 @@ def compute_rdfs( agg_sel.masses = mass_array total_mass = np.sum(mass_array) + # compute the principal moment of inertia and Rg + if compute_i_rg: + agg_sel = u.select_atoms(f"resid {resid}") + # get moment of inertia and principal axis I = agg_sel.moment_of_inertia() UT = agg_sel.principal_axes() # this is already transposed @@ -212,27 +235,66 @@ def compute_rdfs( # Compute the histogram pddf_i, pddf_edges = np.histogram(cond_distmat, bins=nbins, range=(0, rmax), weights=weights, density=False) - # deltaR = rmax / nbins - # pddf_i = np.zeros((nbins,), dtype=np.float64) - # pddf_dists = np.linspace(deltaR/2.0, rmax - deltaR/2.0, nbins) - # for i in range(n): - # for j in range(i + 1, n): - # if sqdistmat[i][j] <= rmax: - # bin_distance = np.floor(sqdistmat[i][j]/deltaR) - - # pddf_i[int(bin_distance)] += form_factors[agg_sel[i].residue.resname][agg_sel[i].name][0] * form_factors[agg_sel[j].residue.resname][agg_sel[j].name][0] - - - # pddf_i, pddf_edges = np.histogram( - # cond_distmat, - # bins=nbins, - # range=(0, rmax), - # density=False, + pddf += pddf_i + + # compute the cylindrical distribution function + if compute_cdfs: + # Based on https://github.com/RichardMandle/cylindr + # Cite: 10.1371/journal.pone.0279679 + + # first align the principal axis of the aggregate with the z-axis + agg_sel = u.select_atoms(f"resid {resid}") + + # get principal axis + pa = agg_sel.principal_axes()[2] + + # get the angle between the principal axis and the z-axis and rotate universe + angle = np.degrees(mdamath.angle(pa, [0,0,1])) + ax = transformations.rotaxis(pa, [0,0,1]) + u.atoms.rotateby(angle, ax, point=cog) + # dirname = f"./rotated_cdf_{agg_size}" + + # if not os.path.exists(dirname): + # os.mkdir(dirname) + + # u.atoms.write( + # os.path.join(dirname, f"rotated_{os.path.basename(snapshot)}") # ) - pddf += pddf_i + # create selections for which CDFs will be computed + for i, sel_string in enumerate(selections): + if sel_string.strip().lower() in ["name w", "type w", "name na", "type na", "name cl", "type cl"]: + at_sel = u.select_atoms(sel_string) + else: + at_sel = u.select_atoms(f"resid {resid} and " + sel_string) + + + # Compute the distances between the target atoms and the center + distances_matrix = at_sel.positions - cog + + # create array with z and r distances + zdist = distances_matrix[:, 2] + rdist = np.sqrt(distances_matrix[:, 0] ** 2 + distances_matrix[:, 1] ** 2) + + # Exclude positions where z is greater than lmax or smaller than -lmax + valid_indices = np.where((zdist >= -lmax) & (zdist <= lmax)) + zdist = zdist[valid_indices] + rdist = rdist[valid_indices] + + # Compute the CDF + cdf, yedges, xedges = np.histogram2d( + zdist, rdist, bins=[nbinsl, nbins], range=[[-lmax, lmax], [0, rmax]], density=False + ) + cdfs[i] += cdf * np.prod(box_vectors[:3]) / natoms_normalization + + # Compute the RDF using numpy histogram + rdf, bin_edges = np.histogram( + rdist, bins=nbins, range=(0, rmax), density=False + ) + rdfs[i] += rdf * np.prod(box_vectors[:3]) / natoms_normalization - if not skip_rdfs: + + elif not skip_rdfs: # create selections for which RDFs will be computed for i, sel_string in enumerate(selections): if sel_string.strip().lower() in ["name w", "type w", "name na", "type na", "name cl", "type cl"]: @@ -253,19 +315,108 @@ def compute_rdfs( flattened_distances, bins=nbins, range=(0, rmax), density=False ) - rdfs[i] += rdf * np.prod(box_vectors[:3]) / len(u.atoms) + rdfs[i] += rdf * np.prod(box_vectors[:3]) / natoms_normalization + - plt.rcParams["figure.figsize"] = (10, 8) + plt.rcParams["figure.figsize"] = fig_size plt.rcParams["font.size"] = 22 + if compute_cdfs: + # compute the volume of each shell + # shell_volumes = np.zeros((np.size(yedges) - 1, np.size(xedges) - 1)) + # for n in range(0, np.size(yedges) -1): + # for m in range(0, np.size(xedges) - 1): + # shell_volumes[n, m] = ((2 * np.pi * (xedges[m + 1]) ** 2) - (2 * np.pi * (xedges[m - 1]) ** 2)) * np.abs(yedges[2] - yedges[1]) + shell_volumes = np.pi * (xedges[1:] ** 2 - xedges[:-1] ** 2) * np.abs(yedges[1] - yedges[0]) + xedges /= 10.0 + yedges /= 10.0 + # prepare to plot + xcenters = (xedges[:-1] + xedges[1:]) / 2.0 + ycenters = (yedges[:-1] + yedges[1:]) / 2.0 + np.savetxt(names_prefix + f"{agg_size}_binsR.txt", xcenters) + np.savetxt(names_prefix + f"{agg_size}_binsL.txt", ycenters) + + # normalize CDFs and find the lower and largest values + minval = np.inf + maxval = -np.inf + for i in range(len(selections)): + cdfs[i] = cdfs[i] / (shell_volumes * nsnaps) + if minval > np.min(cdfs[i]): + minval = np.min(cdfs[i]) + if maxval < np.max(cdfs[i]): + maxval = np.max(cdfs[i]) + + # normalize the CDF by dividing by the shell volume and number of snapshots and plot + fig, ax = plt.subplots(nrows=2, ncols=int(np.ceil(len(selections)/2)), figsize=fig_size_cdf) + fig.tight_layout(w_pad=0.4, h_pad=1.8) + for a in ax.flatten(): + a.set_aspect(5/10) + if int(np.ceil(len(selections)/2)) == 1: + for i, sel in enumerate(selections): + ax[i].set_title(name_selections[i]) + ax[i].set_xlim(xedges[[0, -1]]) + ax[i].set_ylim(yedges[[0, -1]]) + ax[i].set_xticks(np.linspace(xedges[0], xedges[-1], 3)) + ax[i].set_yticks(np.linspace(yedges[0], yedges[-1], 7)) + np.savetxt(names_prefix + f"CDFs_{agg_size}_" + sel.replace(" ", "_") + ".txt", cdfs[i]) + im = ax[i].imshow(cdfs[i], extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], origin='lower', interpolation='bilinear', vmin=minval, vmax=maxval, cmap=plt.cm.RdBu) + ax[i].grid() + ax[i].set_xlabel(r"r (nm)") + ax[i].set_ylabel(r"h (nm)") + else: + # make the last plot blank if the number of selections is odd + if len(selections) % 2 != 0: + ax[-1][-1].axis('off') + for i, sel in enumerate(selections): + idx1, idx2 = i % 2, int(i / 2) + ax[idx1][idx2].set_title(name_selections[i]) + ax[idx1][idx2].set_xlim(xedges[[0, -1]]) + ax[idx1][idx2].set_ylim(yedges[[0, -1]]) + ax[idx1][idx2].set_xticks(np.linspace(xedges[0], xedges[-1], 3)) + ax[idx1][idx2].set_yticks(np.linspace(yedges[0], yedges[-1], 7)) + np.savetxt(names_prefix + f"CDFs_{agg_size}_" + sel.replace(" ", "_") + ".txt", cdfs[i]) + im = ax[idx1][idx2].imshow(cdfs[i], extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], origin='lower', interpolation='bilinear', vmin=minval, vmax=maxval, cmap=plt.cm.RdBu) + ax[idx1][idx2].grid() + ax[idx1][idx2].set_xlabel(r"r (nm)") + ax[idx1][idx2].set_ylabel(r"h (nm)") + # Add color bar + cax = fig.add_axes([start_colorbar, 0.15, width_colorbar, 0.7]) + cbar = fig.colorbar(im, cax=cax) + cbar.set_label('CDF') + fig.subplots_adjust(hspace=0.35, right=start_colorbar) + plt.savefig(names_prefix + f"CDFs_{agg_size}.pdf") + plt.show() + plt.close('all') + + # radial + # shell_volumes = np.pi * (bin_edges[1:] ** 2 - bin_edges[:-1] ** 2) * box_vectors[2] + shell_volumes = np.pi * (bin_edges[1:] ** 2 - bin_edges[:-1] ** 2) * 2.0 * lmax + # Prepare to plot + mid_bin = (bin_edges[:-1] + bin_edges[1:]) / 2.0 + np.savetxt(names_prefix + f"gperp_{agg_size}_bins.txt", mid_bin) + + # Normalize the RDF by dividing by the shell volume and number of snapshots and plot + for i, sel in enumerate(selections): + rdfs[i] = rdfs[i] / (shell_volumes * nsnaps) + + plt.plot(mid_bin / 10.0, rdfs[i], linewidth=3, label=name_selections[i]) + + np.savetxt(names_prefix + f"gperp_{agg_size}_" + sel.replace(" ", "_") + ".txt", rdfs[i]) + + plt.legend(frameon=False) + plt.xlabel(r"r (nm)") + plt.ylabel(r"g$_\perp$(r)") + plt.savefig(names_prefix + f"radial_RDFs_{agg_size}.pdf", bbox_inches="tight") + plt.show() + # check if rdfs are skipped - if not skip_rdfs: + elif not skip_rdfs: # Compute the average volume of each shell shell_volumes = (4.0 / 3.0) * np.pi * (bin_edges[1:] ** 3 - bin_edges[:-1] ** 3) # Prepare to plot mid_bin = (bin_edges[:-1] + bin_edges[1:]) / 2.0 - np.savetxt(f"{agg_size}_bins.txt", mid_bin) + np.savetxt(names_prefix + f"{agg_size}_bins.txt", mid_bin) # Normalize the RDF by dividing by the shell volume and number of snapshots and plot for i, sel in enumerate(selections): @@ -273,12 +424,12 @@ def compute_rdfs( plt.plot(mid_bin / 10.0, rdfs[i], linewidth=3, label=name_selections[i]) - np.savetxt(f"{agg_size}_" + sel.replace(" ", "_") + ".txt", rdfs[i]) + np.savetxt(names_prefix + f"{agg_size}_" + sel.replace(" ", "_") + ".txt", rdfs[i]) plt.legend(frameon=False) plt.xlabel(r"r (nm)") plt.ylabel(r"g(r)") - plt.savefig(f"RDFs_{agg_size}.pdf", bbox_inches="tight") + plt.savefig(names_prefix + f"RDFs_{agg_size}.pdf", bbox_inches="tight") plt.show() # now save information regarding the Is and Rg @@ -309,7 +460,7 @@ def compute_rdfs( plt.xlabel(r"Config") plt.ylabel(r"MOI (Da $\AA^2$)") - plt.savefig(f"Is_{agg_size}.pdf", bbox_inches="tight") + plt.savefig(names_prefix + f"Is_{agg_size}.pdf", bbox_inches="tight") plt.show() # clear plot and plot Rg @@ -319,7 +470,7 @@ def compute_rdfs( plt.xlabel(r"Config") plt.ylabel(r"R$_g$ ($\AA$)") - plt.savefig(f"Rg_{agg_size}.pdf", bbox_inches="tight") + plt.savefig(names_prefix + f"Rg_{agg_size}.pdf", bbox_inches="tight") plt.show() # Now print average values and alpha @@ -339,14 +490,14 @@ def compute_rdfs( # pddf /= np.max(pddf) - np.savetxt(f"{agg_size}_" + "PDDF.txt", pddf) - np.savetxt(f"{agg_size}_" + "PDDF_bins.txt", mid_bin_pddf) + np.savetxt(names_prefix + f"{agg_size}_" + "PDDF.txt", pddf) + np.savetxt(names_prefix + f"{agg_size}_" + "PDDF_bins.txt", mid_bin_pddf) plt.plot(mid_bin_pddf / 10.0, pddf, linewidth=3) plt.xlabel(r"r (nm)") plt.ylabel(r"PDDF") - plt.savefig(f"PDDF_{agg_size}.pdf", bbox_inches="tight") + plt.savefig(names_prefix + f"PDDF_{agg_size}.pdf", bbox_inches="tight") plt.show() @@ -373,14 +524,26 @@ def compute_rdfs( parser.add_argument( "--nbins", type=int, - default=100, - help="number of bins used in the histogram (default = 100)", + default=50, + help="number of bins used in the histogram (default = 50)", + ) + parser.add_argument( + "--nbinsl", + type=int, + default=300, + help="number of bins used in the histogram for CDF in z direction (default = 300)", ) parser.add_argument( "--rmax", type=float, default=10.0, - help="maximum distance of RDF (defualt = 10.0 A)", + help="maximum distance of RDF (default = 10.0 A)", + ) + parser.add_argument( + "--lmax", + type=float, + default=30.0, + help="maximum distance for CDF z direction (default = 30.0 A)", ) parser.add_argument( "--center", @@ -412,6 +575,12 @@ def compute_rdfs( default=False, help="skip RDF computations", ) + parser.add_argument( + "--compute-cdfs", + action="store_true", + default=False, + help="instead of computing RDFs, compute CDFs (requires --masses)", + ) parser.add_argument( "--principal-moments-rg", action="store_true", @@ -422,13 +591,53 @@ def compute_rdfs( "--compute-pddf", type=str, default=None, - help="compute the pair distance distribution function (PDDF) - needs a form factor .toml file", + help="compute the pair distance distribution function (PDDF) - needs parameters file just like for PLUMED", + ) + parser.add_argument( + "--natoms-normalization", + type=int, + default=None, + help="number of atoms to normalize the RDFs/CDFs", + ) + parser.add_argument( + "--names-prefix", + type=str, + default=None, + help="prefix for the names of the saved files", + ) + parser.add_argument( + "--fig-size", + type=int, + nargs=2, + default=(10, 8), + help="two integers to define the size of the RDF figure (default = 10 8)", + ) + parser.add_argument( + "--fig-size-cdf", + type=int, + nargs=2, + default=(12, 12), + help="two integers to define the size of the CDF figure (default = 12 12)", + ) + parser.add_argument( + "--start-colorbar", + type=float, + default=0.85, + help="value for which after that the colorbar will start (default = 0.85) in percentage of width", + ) + parser.add_argument( + "--width-colorbar", + type=float, + default=0.02, + help="width of the colorbar (default = 0.02)", ) args = parser.parse_args() if args.principal_moments_rg and not args.masses: raise AssertionError("--principal-moments-rg requires passing the masses") + if args.compute_cdfs and not args.masses: + raise AssertionError("--compute-cdfs requires passing the masses") if args.name_selections and (len(args.selections) != len(args.name_selections)): raise AssertionError("the number of selections must be equal to the number of names") @@ -449,11 +658,20 @@ def compute_rdfs( args.name_selections, args.center, args.nbins, + args.nbinsl, args.rmax, + args.lmax, + args.natoms_normalization, parse_masses(args.masses), args.do_not_compute_rdfs, args.principal_moments_rg, args.compute_pddf, + args.compute_cdfs, args.save_centered_aggregate, args.save_aggregate_only, + args.names_prefix, + args.start_colorbar, + args.width_colorbar, + tuple(args.fig_size), + tuple(args.fig_size_cdf), ) From 3eb2f29da35e231f70b4026bde7dfc0992d96c6c Mon Sep 17 00:00:00 2001 From: Henrique Musseli Cezar Date: Tue, 21 May 2024 11:26:41 +0200 Subject: [PATCH 12/18] Adds separation in spherical, prolate and oblate aggregates. --- utils/compute_rdf_aggregates.py | 242 +++++++++++++++++++++----------- 1 file changed, 161 insertions(+), 81 deletions(-) diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py index 0ff7ec41..45d4424e 100644 --- a/utils/compute_rdf_aggregates.py +++ b/utils/compute_rdf_aggregates.py @@ -76,9 +76,21 @@ def compute_rdfs( # initialize rdfs = {} cdfs = {} - for i in range(len(selections)): - rdfs[i] = np.zeros(nbins) - cdfs[i] = np.zeros((nbinsl, nbins)) + if compute_cdfs: + type_map = {"sphere": 0, "oblate": 1, "prolate": 2} + rev_type_map = {v: k for k, v in type_map.items()} + agg_types = [] + eccentricities = [] + + for k, v in type_map.items(): + cdfs[v] = {} + rdfs[v] = {} + for i in range(len(selections)): + cdfs[v][i] = np.zeros((nbinsl, nbins)) + rdfs[v][i] = np.zeros(nbins) + else: + for i in range(len(selections)): + rdfs[i] = np.zeros(nbins) if compute_i_rg: I1 = [] I2 = [] @@ -94,13 +106,13 @@ def compute_rdfs( names_prefix = "" nsnaps = 0 + snapshots = [] for snapshot in tqdm(glob(os.path.join(pdbdir, "*.pdb"))): u = mda.Universe(snapshot) if not natoms_normalization: natoms_normalization = len(u.atoms) - # based on topology and residues determine size of aggregates resids = {} for residue in u.residues: @@ -122,6 +134,8 @@ def compute_rdfs( if len(agg_resids) == 0: continue + snapshots.append(os.path.basename(snapshot)) + # for each aggregate of given size, accumulate the RDFs for resid in agg_resids: nsnaps += 1 @@ -245,10 +259,47 @@ def compute_rdfs( # first align the principal axis of the aggregate with the z-axis agg_sel = u.select_atoms(f"resid {resid}") - # get principal axis - pa = agg_sel.principal_axes()[2] + # get moment of inertia and principal axis + I = agg_sel.moment_of_inertia() + UT = agg_sel.principal_axes() # this is already transposed + + # diagonalizes I + Lambda = UT.dot(I.dot(UT.T)) + + I1 = Lambda[0][0] + I2 = Lambda[1][1] + I3 = Lambda[2][2] + + # TODO: based on the three principal moments of inertia we should find + # if the aggregate is rod-like, disk-like or sphere-like + # and then compute the CDFs accordingly (separating the cases) + # this can be done by checking which axis are similar in length and calling them a = b != c + # if c/a is close to 1, then it is a sphere + # if c/a is greater than 1, then it is prolate + # if c/a is smaller than 1, then it is oblate + # then we can also output the amount of each type of aggregate + type_agg = None + # compute eccentricity + e = 1.0 - np.min([I1, I2, I3]) / np.mean([I1, I2, I3]) + eccentricities.append(e) + if e < 0.12: + type_agg = type_map["sphere"] + else: + # I3 is the lowest value (biggest axis) + if np.abs(I1 - I2) > np.abs(I2 - I3): + type_agg = type_map["oblate"] + else: + type_agg = type_map["prolate"] + agg_types.append(type_agg) + + # gets symmetry axis + if type_agg == type_map["oblate"]: + pa = UT[0] + else: + pa = UT[2] # get the angle between the principal axis and the z-axis and rotate universe + # this does not take into consideration prolate or oblate micelles angle = np.degrees(mdamath.angle(pa, [0,0,1])) ax = transformations.rotaxis(pa, [0,0,1]) u.atoms.rotateby(angle, ax, point=cog) @@ -268,7 +319,6 @@ def compute_rdfs( else: at_sel = u.select_atoms(f"resid {resid} and " + sel_string) - # Compute the distances between the target atoms and the center distances_matrix = at_sel.positions - cog @@ -285,13 +335,13 @@ def compute_rdfs( cdf, yedges, xedges = np.histogram2d( zdist, rdist, bins=[nbinsl, nbins], range=[[-lmax, lmax], [0, rmax]], density=False ) - cdfs[i] += cdf * np.prod(box_vectors[:3]) / natoms_normalization + cdfs[type_agg][i] += cdf * np.prod(box_vectors[:3]) / natoms_normalization # Compute the RDF using numpy histogram rdf, bin_edges = np.histogram( rdist, bins=nbins, range=(0, rmax), density=False ) - rdfs[i] += rdf * np.prod(box_vectors[:3]) / natoms_normalization + rdfs[type_agg][i] += rdf * np.prod(box_vectors[:3]) / natoms_normalization elif not skip_rdfs: @@ -322,12 +372,17 @@ def compute_rdfs( plt.rcParams["font.size"] = 22 if compute_cdfs: - # compute the volume of each shell - # shell_volumes = np.zeros((np.size(yedges) - 1, np.size(xedges) - 1)) - # for n in range(0, np.size(yedges) -1): - # for m in range(0, np.size(xedges) - 1): - # shell_volumes[n, m] = ((2 * np.pi * (xedges[m + 1]) ** 2) - (2 * np.pi * (xedges[m - 1]) ** 2)) * np.abs(yedges[2] - yedges[1]) - shell_volumes = np.pi * (xedges[1:] ** 2 - xedges[:-1] ** 2) * np.abs(yedges[1] - yedges[0]) + # plot evolution of the aggregate types + fig, ax = plt.subplots() + ax.plot(np.arange(len(agg_types)) , agg_types) + ax.set_yticks([0, 1, 2]) + ax.set_yticklabels(["sphere", "oblate", "prolate"]) + plt.savefig(names_prefix + f"agg_type_evolution_{agg_size}.pdf") + plt.show() + plt.close('all') + + # CDF + shell_volumes_cdf = np.pi * (xedges[1:] ** 2 - xedges[:-1] ** 2) * np.abs(yedges[1] - yedges[0]) xedges /= 10.0 yedges /= 10.0 # prepare to plot @@ -336,78 +391,103 @@ def compute_rdfs( np.savetxt(names_prefix + f"{agg_size}_binsR.txt", xcenters) np.savetxt(names_prefix + f"{agg_size}_binsL.txt", ycenters) - # normalize CDFs and find the lower and largest values - minval = np.inf - maxval = -np.inf - for i in range(len(selections)): - cdfs[i] = cdfs[i] / (shell_volumes * nsnaps) - if minval > np.min(cdfs[i]): - minval = np.min(cdfs[i]) - if maxval < np.max(cdfs[i]): - maxval = np.max(cdfs[i]) - - # normalize the CDF by dividing by the shell volume and number of snapshots and plot - fig, ax = plt.subplots(nrows=2, ncols=int(np.ceil(len(selections)/2)), figsize=fig_size_cdf) - fig.tight_layout(w_pad=0.4, h_pad=1.8) - for a in ax.flatten(): - a.set_aspect(5/10) - if int(np.ceil(len(selections)/2)) == 1: - for i, sel in enumerate(selections): - ax[i].set_title(name_selections[i]) - ax[i].set_xlim(xedges[[0, -1]]) - ax[i].set_ylim(yedges[[0, -1]]) - ax[i].set_xticks(np.linspace(xedges[0], xedges[-1], 3)) - ax[i].set_yticks(np.linspace(yedges[0], yedges[-1], 7)) - np.savetxt(names_prefix + f"CDFs_{agg_size}_" + sel.replace(" ", "_") + ".txt", cdfs[i]) - im = ax[i].imshow(cdfs[i], extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], origin='lower', interpolation='bilinear', vmin=minval, vmax=maxval, cmap=plt.cm.RdBu) - ax[i].grid() - ax[i].set_xlabel(r"r (nm)") - ax[i].set_ylabel(r"h (nm)") - else: - # make the last plot blank if the number of selections is odd - if len(selections) % 2 != 0: - ax[-1][-1].axis('off') - for i, sel in enumerate(selections): - idx1, idx2 = i % 2, int(i / 2) - ax[idx1][idx2].set_title(name_selections[i]) - ax[idx1][idx2].set_xlim(xedges[[0, -1]]) - ax[idx1][idx2].set_ylim(yedges[[0, -1]]) - ax[idx1][idx2].set_xticks(np.linspace(xedges[0], xedges[-1], 3)) - ax[idx1][idx2].set_yticks(np.linspace(yedges[0], yedges[-1], 7)) - np.savetxt(names_prefix + f"CDFs_{agg_size}_" + sel.replace(" ", "_") + ".txt", cdfs[i]) - im = ax[idx1][idx2].imshow(cdfs[i], extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], origin='lower', interpolation='bilinear', vmin=minval, vmax=maxval, cmap=plt.cm.RdBu) - ax[idx1][idx2].grid() - ax[idx1][idx2].set_xlabel(r"r (nm)") - ax[idx1][idx2].set_ylabel(r"h (nm)") - # Add color bar - cax = fig.add_axes([start_colorbar, 0.15, width_colorbar, 0.7]) - cbar = fig.colorbar(im, cax=cax) - cbar.set_label('CDF') - fig.subplots_adjust(hspace=0.35, right=start_colorbar) - plt.savefig(names_prefix + f"CDFs_{agg_size}.pdf") - plt.show() - plt.close('all') - - # radial - # shell_volumes = np.pi * (bin_edges[1:] ** 2 - bin_edges[:-1] ** 2) * box_vectors[2] - shell_volumes = np.pi * (bin_edges[1:] ** 2 - bin_edges[:-1] ** 2) * 2.0 * lmax + # RDF + shell_volumes_rdf = np.pi * (bin_edges[1:] ** 2 - bin_edges[:-1] ** 2) * 2.0 * lmax # Prepare to plot mid_bin = (bin_edges[:-1] + bin_edges[1:]) / 2.0 np.savetxt(names_prefix + f"gperp_{agg_size}_bins.txt", mid_bin) - # Normalize the RDF by dividing by the shell volume and number of snapshots and plot - for i, sel in enumerate(selections): - rdfs[i] = rdfs[i] / (shell_volumes * nsnaps) + # count agg by type and write to file + count_agg_types = {} + for i in type_map.values(): + count_agg_types[i] = np.count_nonzero(np.array(agg_types) == i) + + # write results of agg type detection to file + with open("summary_compute_rdf_aggregates.txt", "a") as f: + f.write("\nNumber of aggregates classified by type:\n") + for k, v in count_agg_types.items(): + f.write(f"{rev_type_map[k]}: {v} => {100.0 * v/nsnaps:.4f} %. = {np.mean(np.array(eccentricities)[np.array(agg_types) == k]):.4f}\n") + + f.write("\nSnapshot number by aggregate type:\n") + snap_map = np.array(snapshots) + for k, v in type_map.items(): + f.write(f"{k}: ") + for snap in snap_map[np.array(agg_types) == v]: + f.write(f"{snap}, ") + f.write("\n") + + # for each type of aggregate, normalize the CDFs/RDFs and plot + for agg_type in type_map.values(): + # skip if there are no CDFs to plot + if count_agg_types[agg_type] == 0: + continue + + # normalize CDFs and find the lower and largest values + minval = np.inf + maxval = -np.inf + for i in range(len(selections)): + cdfs[agg_type][i] = cdfs[agg_type][i] / (shell_volumes_cdf * count_agg_types[agg_type]) + if minval > np.min(cdfs[agg_type][i]): + minval = np.min(cdfs[agg_type][i]) + if maxval < np.max(cdfs[agg_type][i]): + maxval = np.max(cdfs[agg_type][i]) + + # normalize the CDF by dividing by the shell volume and number of snapshots and plot + fig, ax = plt.subplots(nrows=2, ncols=int(np.ceil(len(selections)/2)), figsize=fig_size_cdf) + fig.tight_layout(w_pad=0.4, h_pad=1.8) + for a in ax.flatten(): + a.set_aspect(5/10) + if int(np.ceil(len(selections)/2)) == 1: + for i, sel in enumerate(selections): + ax[i].set_title(name_selections[i]) + ax[i].set_xlim(xedges[[0, -1]]) + ax[i].set_ylim(yedges[[0, -1]]) + ax[i].set_xticks(np.linspace(xedges[0], xedges[-1], 3)) + ax[i].set_yticks(np.linspace(yedges[0], yedges[-1], 7)) + np.savetxt(names_prefix + f"CDFs_{agg_size}_" + sel.replace(" ", "_") + "_" + rev_type_map[agg_type] + ".txt", cdfs[agg_type][i]) + im = ax[i].imshow(cdfs[agg_type][i], extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], origin='lower', interpolation='bilinear', vmin=minval, vmax=maxval, cmap=plt.cm.RdBu) + ax[i].grid() + ax[i].set_xlabel(r"r (nm)") + ax[i].set_ylabel(r"h (nm)") + else: + # make the last plot blank if the number of selections is odd + if len(selections) % 2 != 0: + ax[-1][-1].axis('off') + for i, sel in enumerate(selections): + idx1, idx2 = i % 2, int(i / 2) + ax[idx1][idx2].set_title(name_selections[i]) + ax[idx1][idx2].set_xlim(xedges[[0, -1]]) + ax[idx1][idx2].set_ylim(yedges[[0, -1]]) + ax[idx1][idx2].set_xticks(np.linspace(xedges[0], xedges[-1], 3)) + ax[idx1][idx2].set_yticks(np.linspace(yedges[0], yedges[-1], 7)) + np.savetxt(names_prefix + f"CDFs_{agg_size}_" + sel.replace(" ", "_") + "_" + rev_type_map[agg_type] + ".txt", cdfs[agg_type][i]) + im = ax[idx1][idx2].imshow(cdfs[agg_type][i], extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], origin='lower', interpolation='bilinear', vmin=minval, vmax=maxval, cmap=plt.cm.RdBu) + ax[idx1][idx2].grid() + ax[idx1][idx2].set_xlabel(r"r (nm)") + ax[idx1][idx2].set_ylabel(r"h (nm)") + # Add color bar + cax = fig.add_axes([start_colorbar, 0.15, width_colorbar, 0.7]) + cbar = fig.colorbar(im, cax=cax) + cbar.set_label('CDF') + fig.subplots_adjust(hspace=0.35, right=start_colorbar) + plt.savefig(names_prefix + f"CDFs_{agg_size}_{rev_type_map[agg_type]}.pdf") + plt.show() + plt.close('all') + + # radial + # Normalize the RDF by dividing by the shell volume and number of snapshots and plot + for i, sel in enumerate(selections): + rdfs[agg_type][i] = rdfs[agg_type][i] / (shell_volumes_rdf * count_agg_types[agg_type]) - plt.plot(mid_bin / 10.0, rdfs[i], linewidth=3, label=name_selections[i]) + plt.plot(mid_bin / 10.0, rdfs[agg_type][i], linewidth=3, label=name_selections[i]) - np.savetxt(names_prefix + f"gperp_{agg_size}_" + sel.replace(" ", "_") + ".txt", rdfs[i]) + np.savetxt(names_prefix + f"gperp_{agg_size}_" + sel.replace(" ", "_") + ".txt", rdfs[agg_type][i]) - plt.legend(frameon=False) - plt.xlabel(r"r (nm)") - plt.ylabel(r"g$_\perp$(r)") - plt.savefig(names_prefix + f"radial_RDFs_{agg_size}.pdf", bbox_inches="tight") - plt.show() + plt.legend(frameon=False) + plt.xlabel(r"r (nm)") + plt.ylabel(r"g$_\perp$(r)") + plt.savefig(names_prefix + f"radial_RDFs_{agg_size}_{rev_type_map[agg_type]}.pdf", bbox_inches="tight") + plt.show() # check if rdfs are skipped elif not skip_rdfs: @@ -482,7 +562,7 @@ def compute_rdfs( ) f.write("a = {} A, b = {} A, c = {} A\n\n".format(a, b, c)) - f.write("alpha = {}\n\n".format((2 * Ia - Ib - Ic) / (Ia + Ib + Ic))) + f.write("e = {}\n\n".format(1.0 - np.min([Ia, Ib, Ic]) / np.mean([Ia, Ib, Ic]))) f.write("Rg = {}±{} A\n".format(Rgm, np.std(Rg))) if compute_pddf: From 886d0f8f21ba828440cc8f6d0da4e81287758876 Mon Sep 17 00:00:00 2001 From: Henrique Musseli Cezar Date: Tue, 21 May 2024 11:54:54 +0200 Subject: [PATCH 13/18] Cleanup and add std dev for eccentricity. --- utils/compute_rdf_aggregates.py | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py index 45d4424e..a43296d5 100644 --- a/utils/compute_rdf_aggregates.py +++ b/utils/compute_rdf_aggregates.py @@ -270,14 +270,7 @@ def compute_rdfs( I2 = Lambda[1][1] I3 = Lambda[2][2] - # TODO: based on the three principal moments of inertia we should find - # if the aggregate is rod-like, disk-like or sphere-like - # and then compute the CDFs accordingly (separating the cases) - # this can be done by checking which axis are similar in length and calling them a = b != c - # if c/a is close to 1, then it is a sphere - # if c/a is greater than 1, then it is prolate - # if c/a is smaller than 1, then it is oblate - # then we can also output the amount of each type of aggregate + # detect the aggregate type type_agg = None # compute eccentricity e = 1.0 - np.min([I1, I2, I3]) / np.mean([I1, I2, I3]) @@ -303,14 +296,6 @@ def compute_rdfs( angle = np.degrees(mdamath.angle(pa, [0,0,1])) ax = transformations.rotaxis(pa, [0,0,1]) u.atoms.rotateby(angle, ax, point=cog) - # dirname = f"./rotated_cdf_{agg_size}" - - # if not os.path.exists(dirname): - # os.mkdir(dirname) - - # u.atoms.write( - # os.path.join(dirname, f"rotated_{os.path.basename(snapshot)}") - # ) # create selections for which CDFs will be computed for i, sel_string in enumerate(selections): @@ -406,7 +391,8 @@ def compute_rdfs( with open("summary_compute_rdf_aggregates.txt", "a") as f: f.write("\nNumber of aggregates classified by type:\n") for k, v in count_agg_types.items(): - f.write(f"{rev_type_map[k]}: {v} => {100.0 * v/nsnaps:.4f} %. = {np.mean(np.array(eccentricities)[np.array(agg_types) == k]):.4f}\n") + e_type = np.array(eccentricities)[np.array(agg_types) == k] + f.write(f"{rev_type_map[k]}: {v} => {100.0 * v/nsnaps:.4f} %. = {np.mean(e_type):.4f} ± {np.std(e_type):.4f} \n") f.write("\nSnapshot number by aggregate type:\n") snap_map = np.array(snapshots) From 2298a7f067d104bbb28caf49a10df2a900c94531 Mon Sep 17 00:00:00 2001 From: Henrique Musseli Cezar Date: Tue, 21 May 2024 12:01:59 +0200 Subject: [PATCH 14/18] Add agg number to summary file name. --- utils/compute_rdf_aggregates.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py index a43296d5..482ebeb1 100644 --- a/utils/compute_rdf_aggregates.py +++ b/utils/compute_rdf_aggregates.py @@ -388,7 +388,7 @@ def compute_rdfs( count_agg_types[i] = np.count_nonzero(np.array(agg_types) == i) # write results of agg type detection to file - with open("summary_compute_rdf_aggregates.txt", "a") as f: + with open(f"summary_compute_rdf_aggregates_{agg_size}.txt", "a") as f: f.write("\nNumber of aggregates classified by type:\n") for k, v in count_agg_types.items(): e_type = np.array(eccentricities)[np.array(agg_types) == k] @@ -713,7 +713,7 @@ def compute_rdfs( topol = process_topology(args.topol) # write options to file - with open("summary_compute_rdf_aggregates.txt", "w") as f: + with open(f"summary_compute_rdf_aggregates_{args.agg_size}.txt", "w") as f: f.write("Command: " + " ".join(sys.argv) + "\n") compute_rdfs( From 437acaf17aeb8247de8d7b61747c5dafe958419b Mon Sep 17 00:00:00 2001 From: Henrique Musseli Cezar Date: Tue, 21 May 2024 12:26:25 +0200 Subject: [PATCH 15/18] Add eccentricity threshold. --- utils/compute_rdf_aggregates.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py index 482ebeb1..8439e640 100644 --- a/utils/compute_rdf_aggregates.py +++ b/utils/compute_rdf_aggregates.py @@ -70,6 +70,7 @@ def compute_rdfs( names_prefix, start_colorbar, width_colorbar, + eccentricity_threshold, fig_size=(10, 8), fig_size_cdf=(12,12), ): @@ -275,7 +276,7 @@ def compute_rdfs( # compute eccentricity e = 1.0 - np.min([I1, I2, I3]) / np.mean([I1, I2, I3]) eccentricities.append(e) - if e < 0.12: + if e < eccentricity_threshold: type_agg = type_map["sphere"] else: # I3 is the lowest value (biggest axis) @@ -697,6 +698,12 @@ def compute_rdfs( default=0.02, help="width of the colorbar (default = 0.02)", ) + parser.add_argument( + "--eccentricity-threshold", + type=float, + default=0.12, + help="eccentricity threshold to classify the aggregate as a sphere (default = 0.12)", + ) args = parser.parse_args() @@ -738,6 +745,7 @@ def compute_rdfs( args.names_prefix, args.start_colorbar, args.width_colorbar, + args.eccentricity_threshold, tuple(args.fig_size), tuple(args.fig_size_cdf), ) From 4ad4ef5c273fb15db6ac709603eb16b4f58126d0 Mon Sep 17 00:00:00 2001 From: Henrique Musseli Cezar Date: Thu, 23 May 2024 11:44:29 +0200 Subject: [PATCH 16/18] Compute actual RDF for spherical aggregates. --- utils/compute_rdf_aggregates.py | 36 ++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py index 8439e640..435ed929 100644 --- a/utils/compute_rdf_aggregates.py +++ b/utils/compute_rdf_aggregates.py @@ -323,10 +323,25 @@ def compute_rdfs( ) cdfs[type_agg][i] += cdf * np.prod(box_vectors[:3]) / natoms_normalization - # Compute the RDF using numpy histogram - rdf, bin_edges = np.histogram( - rdist, bins=nbins, range=(0, rmax), density=False - ) + # Compute the RDF using numpy histogram (if sphere it's the actual RDF, not the perpendicular one) + if type_agg == type_map["sphere"]: + # Compute the distances between the target atoms and the center + distances_matrix = distances.distance_array( + cog, at_sel.positions, box=box_vectors, backend="OpenMP" + ) + + # Flatten the distances matrix to a 1D array + flattened_distances = distances_matrix.flatten() + + # Compute the RDF using numpy histogram + rdf, bin_edges = np.histogram( + flattened_distances, bins=nbins, range=(0, rmax), density=False + ) + else: + rdf, bin_edges = np.histogram( + rdist, bins=nbins, range=(0, rmax), density=False + ) + rdfs[type_agg][i] += rdf * np.prod(box_vectors[:3]) / natoms_normalization @@ -378,7 +393,6 @@ def compute_rdfs( np.savetxt(names_prefix + f"{agg_size}_binsL.txt", ycenters) # RDF - shell_volumes_rdf = np.pi * (bin_edges[1:] ** 2 - bin_edges[:-1] ** 2) * 2.0 * lmax # Prepare to plot mid_bin = (bin_edges[:-1] + bin_edges[1:]) / 2.0 np.savetxt(names_prefix + f"gperp_{agg_size}_bins.txt", mid_bin) @@ -463,16 +477,24 @@ def compute_rdfs( # radial # Normalize the RDF by dividing by the shell volume and number of snapshots and plot + if agg_type == type_map["sphere"]: + shell_volumes_rdf = (4.0 / 3.0) * np.pi * (bin_edges[1:] ** 3 - bin_edges[:-1] ** 3) + else: + shell_volumes_rdf = np.pi * (bin_edges[1:] ** 2 - bin_edges[:-1] ** 2) * 2.0 * lmax + for i, sel in enumerate(selections): rdfs[agg_type][i] = rdfs[agg_type][i] / (shell_volumes_rdf * count_agg_types[agg_type]) plt.plot(mid_bin / 10.0, rdfs[agg_type][i], linewidth=3, label=name_selections[i]) - np.savetxt(names_prefix + f"gperp_{agg_size}_" + sel.replace(" ", "_") + ".txt", rdfs[agg_type][i]) + np.savetxt(names_prefix + f"gperp_{agg_size}_" + sel.replace(" ", "_") + rev_type_map[agg_type] + ".txt", rdfs[agg_type][i]) plt.legend(frameon=False) plt.xlabel(r"r (nm)") - plt.ylabel(r"g$_\perp$(r)") + if agg_type == type_map["sphere"]: + plt.ylabel(r"g(r)") + else: + plt.ylabel(r"g$_\perp$(r)") plt.savefig(names_prefix + f"radial_RDFs_{agg_size}_{rev_type_map[agg_type]}.pdf", bbox_inches="tight") plt.show() From 956c54cefe50b68f16703dbd1e667c785453619c Mon Sep 17 00:00:00 2001 From: Henrique Musseli Cezar Date: Thu, 5 Sep 2024 14:01:50 +0200 Subject: [PATCH 17/18] New cluster analysis scripts. --- utils/compute_dimension_core_agg.py | 213 ++++++++++++++++++ utils/compute_rdf_aggregates.py | 26 ++- utils/order_parameter_agg.py | 328 ++++++++++++++++++++++++++++ 3 files changed, 561 insertions(+), 6 deletions(-) create mode 100644 utils/compute_dimension_core_agg.py create mode 100644 utils/order_parameter_agg.py diff --git a/utils/compute_dimension_core_agg.py b/utils/compute_dimension_core_agg.py new file mode 100644 index 00000000..4b67a44f --- /dev/null +++ b/utils/compute_dimension_core_agg.py @@ -0,0 +1,213 @@ +import MDAnalysis as mda +from MDAnalysis.lib import distances, mdamath, transformations +from glob import glob +import argparse +from tqdm import tqdm +import os +import numpy as np +import tomli + +def process_topology(topol_path): + with open(topol_path, "rb") as f: + topol = tomli.load(f) + + if "include" in topol["system"]: + for file in topol["system"]["include"]: + path = os.path.join(os.path.dirname(topol_path), file) + with open(path, "rb") as f: + itps = tomli.load(f) + for mol, itp in itps.items(): + topol[mol] = itp + + return topol + +def parse_masses(fname): + if not fname: + return None + + masses = {} + with open(fname, "r") as f: + for line in f: + masses[line.split()[0]] = [float(x) for x in line.split()[1:]] + return masses + + + +def compute_order_parameter(topol, pdbdir, agg_size, core_selection, masses, center, eccentricity_threshold=0.14): + type_map = {"sphere": 0, "oblate": 1, "prolate": 2} + agg_types = [] + I1 = {0: [], 1: [], 2: []} + I2 = {0: [], 1: [], 2: []} + I3 = {0: [], 1: [], 2: []} + + nsnaps = 0 + for snapshot in tqdm(glob(os.path.join(pdbdir, "*.pdb"))): + u = mda.Universe(snapshot) + + # based on topology and residues determine size of aggregates + resids = {} + for residue in u.residues: + nummols = int( + residue.atoms.positions.shape[0] / topol[residue.resname]["atomnum"] + ) + if residue.resid in resids: + resids[residue.resid] += nummols + else: + resids[residue.resid] = nummols + + # check if we can find an aggregate of the given size + agg_resids = [] + for k, v in resids.items(): + if v == agg_size: + agg_resids.append(k) + + # if size was not found, go to next pdb + if len(agg_resids) == 0: + continue + + # for each aggregate of given size, get the order parameter + for ididx, resid in enumerate(agg_resids): + if ididx > 0: + u = mda.Universe(snapshot) + nsnaps += 1 + box_vectors = u.dimensions + agg_sel = u.select_atoms(f"resid {resid}") + + if not center: + center_coord = u.select_atoms(f"resid {resid}").positions + else: + center_coord = u.select_atoms(f"resid {resid} and " + center).positions + + # get the geometric center using the algorithm: + # https://en.wikipedia.org/wiki/Center_of_mass#Systems_with_periodic_boundary_conditions + tetha = (center_coord / box_vectors[:3]) * (2.0 * np.pi) + xi = np.cos(tetha) + zeta = np.sin(tetha) + + xi_bar = np.mean(xi, axis=0) + zeta_bar = np.mean(zeta, axis=0) + tetha_bar = np.arctan2(-zeta_bar, -xi_bar) + np.pi + + cog = (box_vectors[:3] * tetha_bar) / (2.0 * np.pi) + + box_center = box_vectors[:3] / 2.0 + u.atoms.translate(box_center - cog) + u.atoms.wrap(compound="atoms") + cog = box_center + + # set the masses + i = 0 + mass_array = [] + while i < len(agg_sel.resnames): + name = agg_sel.resnames[i] + for bmass in masses[name]: + mass_array.append(bmass) + i += len(masses[name]) + + mass_array = np.array(mass_array) + agg_sel.masses = mass_array + + # get moment of inertia and principal axis + I = agg_sel.moment_of_inertia() + UT = agg_sel.principal_axes() # this is already transposed + + # diagonalizes I + Lambda = UT.dot(I.dot(UT.T)) + + Ia = Lambda[0][0] + Ib = Lambda[1][1] + Ic = Lambda[2][2] + + # detect the aggregate type + type_agg = None + # compute eccentricity + e = 1.0 - np.min([Ia, Ib, Ic]) / np.mean([Ia, Ib, Ic]) + if e < eccentricity_threshold: + type_agg = type_map["sphere"] + else: + # Ic is the lowest value (biggest axis) + if np.abs(Ia - Ib) > np.abs(Ib - Ic): + continue # if oblate, skip + # type_agg = type_map["oblate"] + else: + type_agg = type_map["prolate"] + agg_types.append(type_agg) + + # gets symmetry axis + if type_agg == type_map["oblate"]: + pa = UT[0] + else: + pa = UT[2] + + # get the angle between the principal axis and the z-axis and rotate universe + # this does not take into consideration prolate or oblate micelles + angle = np.degrees(mdamath.angle(pa, [0,0,1])) + ax = transformations.rotaxis(pa, [0,0,1]) + u.atoms.rotateby(angle, ax, point=cog) + + # select the core and get dimensions + core = u.select_atoms(f"resid {resid} and {core_selection}") + total_mass = np.sum(core.masses) + + # get moment of inertia and principal axis + I = core.moment_of_inertia() + UT = core.principal_axes() # this is already transposed + + # diagonalizes I + Lambda = UT.dot(I.dot(UT.T)) + + I1[type_agg].append(Lambda[0][0]) + I2[type_agg].append(Lambda[1][1]) + I3[type_agg].append(Lambda[2][2]) + + # for each aggregate type, compute the dimensions + for k, v in type_map.items(): + if len(I1[v]) == 0: + continue + + Ia = np.mean(I1[v]) + Ib = np.mean(I2[v]) + Ic = np.mean(I3[v]) + + a = np.sqrt((5.0 / (2.0 * total_mass)) * (Ib + Ic - Ia)) + b = np.sqrt((5.0 / (2.0 * total_mass)) * (Ia + Ic - Ib)) + c = np.sqrt((5.0 / (2.0 * total_mass)) * (Ia + Ib - Ic)) + + print(f"Aggregate type: {k}") + print(f"Dimensions: a = {a:.4f} A, b = {b:.4f} A, c = {c:.4f} A") + + +if __name__ == "__main__": + description = "Given the directory with all the .pdb generated by aggregates.py compute the dimensions for the core of the aggregates." + parser = argparse.ArgumentParser(description=description) + parser.add_argument("topol", type=str, help=".toml topology files") + parser.add_argument("pdb_dir", type=str, help="directory containing the .pdb") + parser.add_argument( + "agg_size", type=int, help="aggregate size for which the RDFs will be computed" + ) + parser.add_argument( + "core_selection", + type=str, + help='Selection for the core beads. Example: "name C2 TC5"', + ) + parser.add_argument( + "masses", + type=str, + default=None, + help="file containing residue name (molecule name) in first column and a column with the mass for each bead", + ) + parser.add_argument( + "--eccentricity-threshold", + type=float, + default=0.14, + help="eccentricity threshold to determine the type of aggregate", + ) + parser.add_argument( + "--center", + type=str, + default=False, + help="selection for the center (if not used the center is the centroid)", + ) + args = parser.parse_args() + topol = process_topology(args.topol) + compute_order_parameter(topol, args.pdb_dir, args.agg_size, args.core_selection, parse_masses(args.masses), args.center, args.eccentricity_threshold) \ No newline at end of file diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py index 435ed929..2e375bd0 100644 --- a/utils/compute_rdf_aggregates.py +++ b/utils/compute_rdf_aggregates.py @@ -135,10 +135,11 @@ def compute_rdfs( if len(agg_resids) == 0: continue - snapshots.append(os.path.basename(snapshot)) - # for each aggregate of given size, accumulate the RDFs - for resid in agg_resids: + for ididx, resid in enumerate(agg_resids): + if ididx > 0: + u = mda.Universe(snapshot) + snapshots.append(os.path.basename(snapshot)) nsnaps += 1 box_vectors = u.dimensions @@ -167,7 +168,7 @@ def compute_rdfs( u.atoms.wrap(compound="atoms") cog = box_center - if save_agg_only: + if save_agg_only and not compute_cdfs: agg_sel = u.select_atoms(f"resid {resid}") dirname = f"./agg_only_{agg_size}" @@ -176,7 +177,7 @@ def compute_rdfs( os.mkdir(dirname) agg_sel.atoms.write( - os.path.join(dirname, f"centered_{os.path.basename(snapshot)}") + os.path.join(dirname, f"centered_{resid}_{os.path.basename(snapshot)}") ) if save_centered: @@ -186,7 +187,7 @@ def compute_rdfs( os.mkdir(dirname) u.atoms.write( - os.path.join(dirname, f"centered_{os.path.basename(snapshot)}") + os.path.join(dirname, f"centered_{resid}_{os.path.basename(snapshot)}") ) # set the masses @@ -298,6 +299,16 @@ def compute_rdfs( ax = transformations.rotaxis(pa, [0,0,1]) u.atoms.rotateby(angle, ax, point=cog) + if save_agg_only: + dirname = f"./agg_only_{agg_size}" + + if not os.path.exists(dirname): + os.mkdir(dirname) + + agg_sel.atoms.write( + os.path.join(dirname, f"centered_{resid}_{os.path.basename(snapshot)}") + ) + # create selections for which CDFs will be computed for i, sel_string in enumerate(selections): if sel_string.strip().lower() in ["name w", "type w", "name na", "type na", "name cl", "type cl"]: @@ -369,6 +380,9 @@ def compute_rdfs( rdfs[i] += rdf * np.prod(box_vectors[:3]) / natoms_normalization + if len(snapshots) == 0: + raise ValueError(f"No aggregates of size {agg_size} were found in the directory") + plt.rcParams["figure.figsize"] = fig_size plt.rcParams["font.size"] = 22 diff --git a/utils/order_parameter_agg.py b/utils/order_parameter_agg.py new file mode 100644 index 00000000..f0dc55e3 --- /dev/null +++ b/utils/order_parameter_agg.py @@ -0,0 +1,328 @@ +import MDAnalysis as mda +from MDAnalysis.lib import distances, mdamath, transformations +from glob import glob +import argparse +from tqdm import tqdm +import os +import numpy as np +import tomli + +def process_topology(topol_path): + with open(topol_path, "rb") as f: + topol = tomli.load(f) + + if "include" in topol["system"]: + for file in topol["system"]["include"]: + path = os.path.join(os.path.dirname(topol_path), file) + with open(path, "rb") as f: + itps = tomli.load(f) + for mol, itp in itps.items(): + topol[mol] = itp + + return topol + +def parse_masses(fname): + if not fname: + return None + + masses = {} + with open(fname, "r") as f: + for line in f: + masses[line.split()[0]] = [float(x) for x in line.split()[1:]] + return masses + + +def get_index_beads(topol, bead1, bead2): + for i, atom in enumerate(topol["atoms"]): + if atom[1] == bead1: + bead1_index = i + elif atom[1] == bead2: + bead2_index = i + return bead1_index, bead2_index + + +def compute_order_parameter(topol, pdbdir, agg_size, bead1, bead2, masses, cylinder_length, center, eccentricity_threshold=0.14): + type_map = {"sphere": 0, "oblate": 1, "prolate": 2} + agg_types = [] + all_order_parameters = [] + all_cosines = [] + all_order_parameters_cylinder = [] + all_cosines_cylinder = [] + all_order_parameters_cap = [] + all_cosines_cap = [] + all_distances = [] + distances_sphere = [] + distances_prolate = [] + all_angles = [] + angles_sphere = [] + angles_prolate = [] + angles_cap = [] + + nsnaps = 0 + for snapshot in tqdm(glob(os.path.join(pdbdir, "*.pdb"))): + u = mda.Universe(snapshot) + + # based on topology and residues determine size of aggregates + resids = {} + for residue in u.residues: + nummols = int( + residue.atoms.positions.shape[0] / topol[residue.resname]["atomnum"] + ) + if residue.resid in resids: + resids[residue.resid] += nummols + else: + resids[residue.resid] = nummols + + # check if we can find an aggregate of the given size + agg_resids = [] + for k, v in resids.items(): + if v == agg_size: + agg_resids.append(k) + + # if size was not found, go to next pdb + if len(agg_resids) == 0: + continue + + # for each aggregate of given size, get the order parameter + for ididx, resid in enumerate(agg_resids): + if ididx > 0: + u = mda.Universe(snapshot) + nsnaps += 1 + box_vectors = u.dimensions + agg_sel = u.select_atoms(f"resid {resid}") + + if not center: + center_coord = u.select_atoms(f"resid {resid}").positions + else: + center_coord = u.select_atoms(f"resid {resid} and " + center).positions + + # get the geometric center using the algorithm: + # https://en.wikipedia.org/wiki/Center_of_mass#Systems_with_periodic_boundary_conditions + tetha = (center_coord / box_vectors[:3]) * (2.0 * np.pi) + xi = np.cos(tetha) + zeta = np.sin(tetha) + + xi_bar = np.mean(xi, axis=0) + zeta_bar = np.mean(zeta, axis=0) + tetha_bar = np.arctan2(-zeta_bar, -xi_bar) + np.pi + + cog = (box_vectors[:3] * tetha_bar) / (2.0 * np.pi) + + box_center = box_vectors[:3] / 2.0 + u.atoms.translate(box_center - cog) + u.atoms.wrap(compound="atoms") + cog = box_center + + # set the masses + i = 0 + mass_array = [] + while i < len(agg_sel.resnames): + name = agg_sel.resnames[i] + for bmass in masses[name]: + mass_array.append(bmass) + i += len(masses[name]) + + mass_array = np.array(mass_array) + agg_sel.masses = mass_array + total_mass = np.sum(mass_array) + + # get moment of inertia and principal axis + I = agg_sel.moment_of_inertia() + UT = agg_sel.principal_axes() # this is already transposed + + # diagonalizes I + Lambda = UT.dot(I.dot(UT.T)) + + I1 = Lambda[0][0] + I2 = Lambda[1][1] + I3 = Lambda[2][2] + + # detect the aggregate type + type_agg = None + # compute eccentricity + e = 1.0 - np.min([I1, I2, I3]) / np.mean([I1, I2, I3]) + if e < eccentricity_threshold: + type_agg = type_map["sphere"] + else: + # I3 is the lowest value (biggest axis) + if np.abs(I1 - I2) > np.abs(I2 - I3): + continue # if oblate, skip + # type_agg = type_map["oblate"] + else: + type_agg = type_map["prolate"] + agg_types.append(type_agg) + + # gets symmetry axis + if type_agg == type_map["oblate"]: + pa = UT[0] + else: + pa = UT[2] + + # get the angle between the principal axis and the z-axis and rotate universe + # this does not take into consideration prolate or oblate micelles + angle = np.degrees(mdamath.angle(pa, [0,0,1])) + ax = transformations.rotaxis(pa, [0,0,1]) + u.atoms.rotateby(angle, ax, point=cog) + + # iterate over all molecules computing the order parameter + molecule_order_parameters = [] + simple_cosine = [] + cylinder_order_parameters = [] + cylinder_cosine = [] + cap_order_parameters = [] + cap_cosine = [] + for mol in agg_sel.residues: + nmols_residue = int(mol.atoms.positions.shape[0] / topol[mol.resname]["atomnum"]) + for i in range(nmols_residue): + mol_at_positions = mol.atoms.positions[i * topol[mol.resname]["atomnum"] : (i + 1) * topol[mol.resname]["atomnum"]] - cog + # get positions of bead only + bead1_index, bead2_index = get_index_beads(topol[mol.resname], bead1, bead2) + orient_vec = mol_at_positions[bead2_index] - mol_at_positions[bead1_index] + all_distances.append(np.linalg.norm(orient_vec)) + + if type_agg == type_map["sphere"]: + distances_sphere.append(np.linalg.norm(orient_vec)) + cm_vec = mol_at_positions[bead1_index] + + # compute order parameter + cosine = np.dot(cm_vec, orient_vec) / (np.linalg.norm(cm_vec) * np.linalg.norm(orient_vec)) + molecule_order_parameters.append(0.5 * (3.0 * cosine * cosine - 1.0)) + simple_cosine.append(cosine) + angles_sphere.append(np.arccos(cosine) * 180.0 / np.pi) + + elif type_agg == type_map["prolate"]: + distances_prolate.append(np.linalg.norm(orient_vec)) + # find if the molecule is in the cylindrical or cap regions + if mol_at_positions[bead1_index][2] > cylinder_length: # top cap + cm_vec = mol_at_positions[bead1_index] + cm_vec[2] -= cylinder_length + + cosine = np.dot(cm_vec, orient_vec) / (np.linalg.norm(cm_vec) * np.linalg.norm(orient_vec)) + cap_order_parameters.append(0.5 * (3.0 * cosine * cosine - 1.0)) + cap_cosine.append(cosine) + angles_cap.append(np.arccos(cosine) * 180.0 / np.pi) + elif mol_at_positions[bead1_index][2] < -cylinder_length: # bottom cap + cm_vec = mol_at_positions[bead1_index] + cm_vec[2] += cylinder_length + + cosine = np.dot(cm_vec, orient_vec) / (np.linalg.norm(cm_vec) * np.linalg.norm(orient_vec)) + cap_order_parameters.append(0.5 * (3.0 * cosine * cosine - 1.0)) + cap_cosine.append(cosine) + angles_cap.append(np.arccos(cosine) * 180.0 / np.pi) + else: # cylinder part + cm_vec = mol_at_positions[bead1_index] + cm_vec[2] = 0.0 + + cosine = np.dot(cm_vec, orient_vec) / (np.linalg.norm(cm_vec) * np.linalg.norm(orient_vec)) + cylinder_order_parameters.append(0.5 * (3.0 * cosine * cosine - 1.0)) + cylinder_cosine.append(cosine) + angles_prolate.append(np.arccos(cosine) * 180.0 / np.pi) + + + all_angles.append(np.arccos(cosine) * 180.0 / np.pi) + + if type_agg == type_map["sphere"]: + all_order_parameters.append(np.mean(molecule_order_parameters)) + all_cosines.append(np.mean(simple_cosine)) + elif type_agg == type_map["prolate"]: + all_order_parameters_cylinder.append(np.mean(cylinder_order_parameters)) + all_cosines_cylinder.append(np.mean(cylinder_cosine)) + all_order_parameters_cap.append(np.mean(cap_order_parameters)) + all_cosines_cap.append(np.mean(cap_cosine)) + + print(f"Aggregate size: {agg_size}") + print(f"Eccentricity threshold: {eccentricity_threshold}") + print(f"Prolate cylinder half length: {cylinder_length}\n") + + print(f"Percentage of spheres: {agg_types.count(type_map['sphere']) / nsnaps}") + print(f"Percentage of oblates: {agg_types.count(type_map['oblate']) / nsnaps}") + print(f"Percentage of prolates: {agg_types.count(type_map['prolate']) / nsnaps}\n") + + print(f"Mean distance between beads: {np.mean(all_distances)} ± {np.std(all_distances)}") + print(f"Mean distance between beads (sphere): {np.mean(distances_sphere)} ± {np.std(distances_sphere)}") + print(f"Mean distance between beads (prolate): {np.mean(distances_prolate)} ± {np.std(distances_prolate)}\n") + + print(f"Mean order parameter (sphere): {np.mean(all_order_parameters)} ± {np.std(all_order_parameters)}") + print(f"Mean cosine (sphere): {np.mean(all_cosines)} ± {np.std(all_cosines)}") + print(f"Mean order parameter (cylinder): {np.mean(all_order_parameters_cylinder)} ± {np.std(all_order_parameters_cylinder)}") + print(f"Mean cosine (cylinder): {np.mean(all_cosines_cylinder)} ± {np.std(all_cosines_cylinder)}") + print(f"Mean order parameter (cap): {np.mean(all_order_parameters_cap)} ± {np.std(all_order_parameters_cap)}") + print(f"Mean cosine (cap): {np.mean(all_cosines_cap)} ± {np.std(all_cosines_cap)}\n") + + print(f"Mean angle (all): {np.mean(all_angles)} ± {np.std(all_angles)}") + + import matplotlib.pyplot as plt + plt.rcParams["figure.figsize"] = (10, 8) + plt.rcParams["font.size"] = 22 + + # Plotting distributions + angles = [all_angles, angles_cap, angles_prolate, angles_sphere] + labels = ['All Angles', 'Cap Angles', 'Prolate Angles', 'Sphere Angles'] + colors = ['C0', 'C1', 'C2', 'C3'] + counts = [len(angle_list) for angle_list in angles] + sorted_angles = [angle_list for _, angle_list in sorted(zip(counts, angles), reverse=True)] + sorted_labels = [label for _, label in sorted(zip(counts, labels), reverse=True)] + sorted_colors = [color for _, color in sorted(zip(counts, colors), reverse=True)] + + for angle_list, label, color in zip(sorted_angles, sorted_labels, sorted_colors): + plt.hist(angle_list, bins=50, alpha=0.5, label=label, color=color) + + # Adding labels and legend + plt.xlabel(r'Angle ($\degree$)') + plt.ylabel('Frequency') + plt.legend() + + # Displaying the plot + plt.savefig(f"distr_angles_surface_{agg_size}.pdf", bbox_inches='tight') + plt.show() + + +if __name__ == "__main__": + description = "Given the directory with all the .pdb generated by aggregates.py compute the order parameters for each aggregate type" + parser = argparse.ArgumentParser(description=description) + parser.add_argument("topol", type=str, help=".toml topology files") + parser.add_argument("pdb_dir", type=str, help="directory containing the .pdb") + parser.add_argument( + "agg_size", type=int, help="aggregate size for which the RDFs will be computed" + ) + parser.add_argument( + "bead1", + type=str, + help='name of first bead in the order parameter calculation. Example: "TN2a"', + ) + parser.add_argument( + "bead2", + type=str, + help='name of second bead in the order parameter calculation. Example: "TP1"', + ) + parser.add_argument( + "masses", + type=str, + default=None, + help="file containing residue name (molecule name) in first column and a column with the mass for each bead", + ) + parser.add_argument( + "--cylinder-length", + type=float, + default=0.0, + help="HALF length of the cylinder (not considering the cap) to compute the order parameter", + ) + parser.add_argument( + "--eccentricity-threshold", + type=float, + default=0.14, + help="eccentricity threshold to determine the type of aggregate", + ) + parser.add_argument( + "--center", + type=str, + default=False, + help="selection for the center (if not used the center is the centroid)", + ) + + args = parser.parse_args() + + topol = process_topology(args.topol) + + compute_order_parameter(topol, args.pdb_dir, args.agg_size, args.bead1, args.bead2, parse_masses(args.masses), args.cylinder_length, args.center, args.eccentricity_threshold) + From b136a28b0fe3f1b2b1f88320b96164b8eedeca0e Mon Sep 17 00:00:00 2001 From: Henrique Musseli Cezar Date: Thu, 5 Sep 2024 14:31:26 +0200 Subject: [PATCH 18/18] Alllow mpirun to oversubscribe to avoid CI errors. --- pytest-mpi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pytest-mpi b/pytest-mpi index b661bf84..97a817ce 100755 --- a/pytest-mpi +++ b/pytest-mpi @@ -82,7 +82,7 @@ else fi if [ "${ORDEROUTPUT}" -eq 1 ]; then - mpirun -n ${NPROCS} --output-filename outtest pytest ${PYTEST_ARGS} --only-mpi >/dev/null + mpirun -n ${NPROCS} --oversubscribe --output-filename outtest pytest ${PYTEST_ARGS} --only-mpi >/dev/null exit_code=$? for ((RANK=0; RANK<${NPROCS}; RANK++)); do RANKFILE="outtest/1/rank.${RANK}/stdout" @@ -94,10 +94,10 @@ else done rm -r outtest elif [ "${UNMUTE}" -eq -1 ]; then - mpirun -n ${NPROCS} pytest ${PYTEST_ARGS} --only-mpi + mpirun -n ${NPROCS} --oversubscribe pytest ${PYTEST_ARGS} --only-mpi exit_code=$? else - mpirun -n ${NPROCS} utils/mute_all_ranks_except.sh ${UNMUTE} pytest ${PYTEST_ARGS} --only-mpi + mpirun -n ${NPROCS} --oversubscribe utils/mute_all_ranks_except.sh ${UNMUTE} pytest ${PYTEST_ARGS} --only-mpi exit_code=$? fi fi