diff --git a/utils/compute_dimension_core_agg.py b/utils/compute_dimension_core_agg.py new file mode 100644 index 00000000..4b67a44f --- /dev/null +++ b/utils/compute_dimension_core_agg.py @@ -0,0 +1,213 @@ +import MDAnalysis as mda +from MDAnalysis.lib import distances, mdamath, transformations +from glob import glob +import argparse +from tqdm import tqdm +import os +import numpy as np +import tomli + +def process_topology(topol_path): + with open(topol_path, "rb") as f: + topol = tomli.load(f) + + if "include" in topol["system"]: + for file in topol["system"]["include"]: + path = os.path.join(os.path.dirname(topol_path), file) + with open(path, "rb") as f: + itps = tomli.load(f) + for mol, itp in itps.items(): + topol[mol] = itp + + return topol + +def parse_masses(fname): + if not fname: + return None + + masses = {} + with open(fname, "r") as f: + for line in f: + masses[line.split()[0]] = [float(x) for x in line.split()[1:]] + return masses + + + +def compute_order_parameter(topol, pdbdir, agg_size, core_selection, masses, center, eccentricity_threshold=0.14): + type_map = {"sphere": 0, "oblate": 1, "prolate": 2} + agg_types = [] + I1 = {0: [], 1: [], 2: []} + I2 = {0: [], 1: [], 2: []} + I3 = {0: [], 1: [], 2: []} + + nsnaps = 0 + for snapshot in tqdm(glob(os.path.join(pdbdir, "*.pdb"))): + u = mda.Universe(snapshot) + + # based on topology and residues determine size of aggregates + resids = {} + for residue in u.residues: + nummols = int( + residue.atoms.positions.shape[0] / topol[residue.resname]["atomnum"] + ) + if residue.resid in resids: + resids[residue.resid] += nummols + else: + resids[residue.resid] = nummols + + # check if we can find an aggregate of the given size + agg_resids = [] + for k, v in resids.items(): + if v == agg_size: + agg_resids.append(k) + + # if size was not found, go to next pdb + if len(agg_resids) == 0: + continue + + # for each aggregate of given size, get the order parameter + for ididx, resid in enumerate(agg_resids): + if ididx > 0: + u = mda.Universe(snapshot) + nsnaps += 1 + box_vectors = u.dimensions + agg_sel = u.select_atoms(f"resid {resid}") + + if not center: + center_coord = u.select_atoms(f"resid {resid}").positions + else: + center_coord = u.select_atoms(f"resid {resid} and " + center).positions + + # get the geometric center using the algorithm: + # https://en.wikipedia.org/wiki/Center_of_mass#Systems_with_periodic_boundary_conditions + tetha = (center_coord / box_vectors[:3]) * (2.0 * np.pi) + xi = np.cos(tetha) + zeta = np.sin(tetha) + + xi_bar = np.mean(xi, axis=0) + zeta_bar = np.mean(zeta, axis=0) + tetha_bar = np.arctan2(-zeta_bar, -xi_bar) + np.pi + + cog = (box_vectors[:3] * tetha_bar) / (2.0 * np.pi) + + box_center = box_vectors[:3] / 2.0 + u.atoms.translate(box_center - cog) + u.atoms.wrap(compound="atoms") + cog = box_center + + # set the masses + i = 0 + mass_array = [] + while i < len(agg_sel.resnames): + name = agg_sel.resnames[i] + for bmass in masses[name]: + mass_array.append(bmass) + i += len(masses[name]) + + mass_array = np.array(mass_array) + agg_sel.masses = mass_array + + # get moment of inertia and principal axis + I = agg_sel.moment_of_inertia() + UT = agg_sel.principal_axes() # this is already transposed + + # diagonalizes I + Lambda = UT.dot(I.dot(UT.T)) + + Ia = Lambda[0][0] + Ib = Lambda[1][1] + Ic = Lambda[2][2] + + # detect the aggregate type + type_agg = None + # compute eccentricity + e = 1.0 - np.min([Ia, Ib, Ic]) / np.mean([Ia, Ib, Ic]) + if e < eccentricity_threshold: + type_agg = type_map["sphere"] + else: + # Ic is the lowest value (biggest axis) + if np.abs(Ia - Ib) > np.abs(Ib - Ic): + continue # if oblate, skip + # type_agg = type_map["oblate"] + else: + type_agg = type_map["prolate"] + agg_types.append(type_agg) + + # gets symmetry axis + if type_agg == type_map["oblate"]: + pa = UT[0] + else: + pa = UT[2] + + # get the angle between the principal axis and the z-axis and rotate universe + # this does not take into consideration prolate or oblate micelles + angle = np.degrees(mdamath.angle(pa, [0,0,1])) + ax = transformations.rotaxis(pa, [0,0,1]) + u.atoms.rotateby(angle, ax, point=cog) + + # select the core and get dimensions + core = u.select_atoms(f"resid {resid} and {core_selection}") + total_mass = np.sum(core.masses) + + # get moment of inertia and principal axis + I = core.moment_of_inertia() + UT = core.principal_axes() # this is already transposed + + # diagonalizes I + Lambda = UT.dot(I.dot(UT.T)) + + I1[type_agg].append(Lambda[0][0]) + I2[type_agg].append(Lambda[1][1]) + I3[type_agg].append(Lambda[2][2]) + + # for each aggregate type, compute the dimensions + for k, v in type_map.items(): + if len(I1[v]) == 0: + continue + + Ia = np.mean(I1[v]) + Ib = np.mean(I2[v]) + Ic = np.mean(I3[v]) + + a = np.sqrt((5.0 / (2.0 * total_mass)) * (Ib + Ic - Ia)) + b = np.sqrt((5.0 / (2.0 * total_mass)) * (Ia + Ic - Ib)) + c = np.sqrt((5.0 / (2.0 * total_mass)) * (Ia + Ib - Ic)) + + print(f"Aggregate type: {k}") + print(f"Dimensions: a = {a:.4f} A, b = {b:.4f} A, c = {c:.4f} A") + + +if __name__ == "__main__": + description = "Given the directory with all the .pdb generated by aggregates.py compute the dimensions for the core of the aggregates." + parser = argparse.ArgumentParser(description=description) + parser.add_argument("topol", type=str, help=".toml topology files") + parser.add_argument("pdb_dir", type=str, help="directory containing the .pdb") + parser.add_argument( + "agg_size", type=int, help="aggregate size for which the RDFs will be computed" + ) + parser.add_argument( + "core_selection", + type=str, + help='Selection for the core beads. Example: "name C2 TC5"', + ) + parser.add_argument( + "masses", + type=str, + default=None, + help="file containing residue name (molecule name) in first column and a column with the mass for each bead", + ) + parser.add_argument( + "--eccentricity-threshold", + type=float, + default=0.14, + help="eccentricity threshold to determine the type of aggregate", + ) + parser.add_argument( + "--center", + type=str, + default=False, + help="selection for the center (if not used the center is the centroid)", + ) + args = parser.parse_args() + topol = process_topology(args.topol) + compute_order_parameter(topol, args.pdb_dir, args.agg_size, args.core_selection, parse_masses(args.masses), args.center, args.eccentricity_threshold) \ No newline at end of file diff --git a/utils/compute_rdf_aggregates.py b/utils/compute_rdf_aggregates.py index 0ff7ec41..2e375bd0 100644 --- a/utils/compute_rdf_aggregates.py +++ b/utils/compute_rdf_aggregates.py @@ -70,15 +70,28 @@ def compute_rdfs( names_prefix, start_colorbar, width_colorbar, + eccentricity_threshold, fig_size=(10, 8), fig_size_cdf=(12,12), ): # initialize rdfs = {} cdfs = {} - for i in range(len(selections)): - rdfs[i] = np.zeros(nbins) - cdfs[i] = np.zeros((nbinsl, nbins)) + if compute_cdfs: + type_map = {"sphere": 0, "oblate": 1, "prolate": 2} + rev_type_map = {v: k for k, v in type_map.items()} + agg_types = [] + eccentricities = [] + + for k, v in type_map.items(): + cdfs[v] = {} + rdfs[v] = {} + for i in range(len(selections)): + cdfs[v][i] = np.zeros((nbinsl, nbins)) + rdfs[v][i] = np.zeros(nbins) + else: + for i in range(len(selections)): + rdfs[i] = np.zeros(nbins) if compute_i_rg: I1 = [] I2 = [] @@ -94,13 +107,13 @@ def compute_rdfs( names_prefix = "" nsnaps = 0 + snapshots = [] for snapshot in tqdm(glob(os.path.join(pdbdir, "*.pdb"))): u = mda.Universe(snapshot) if not natoms_normalization: natoms_normalization = len(u.atoms) - # based on topology and residues determine size of aggregates resids = {} for residue in u.residues: @@ -123,7 +136,10 @@ def compute_rdfs( continue # for each aggregate of given size, accumulate the RDFs - for resid in agg_resids: + for ididx, resid in enumerate(agg_resids): + if ididx > 0: + u = mda.Universe(snapshot) + snapshots.append(os.path.basename(snapshot)) nsnaps += 1 box_vectors = u.dimensions @@ -152,7 +168,7 @@ def compute_rdfs( u.atoms.wrap(compound="atoms") cog = box_center - if save_agg_only: + if save_agg_only and not compute_cdfs: agg_sel = u.select_atoms(f"resid {resid}") dirname = f"./agg_only_{agg_size}" @@ -161,7 +177,7 @@ def compute_rdfs( os.mkdir(dirname) agg_sel.atoms.write( - os.path.join(dirname, f"centered_{os.path.basename(snapshot)}") + os.path.join(dirname, f"centered_{resid}_{os.path.basename(snapshot)}") ) if save_centered: @@ -171,7 +187,7 @@ def compute_rdfs( os.mkdir(dirname) u.atoms.write( - os.path.join(dirname, f"centered_{os.path.basename(snapshot)}") + os.path.join(dirname, f"centered_{resid}_{os.path.basename(snapshot)}") ) # set the masses @@ -245,21 +261,53 @@ def compute_rdfs( # first align the principal axis of the aggregate with the z-axis agg_sel = u.select_atoms(f"resid {resid}") - # get principal axis - pa = agg_sel.principal_axes()[2] + # get moment of inertia and principal axis + I = agg_sel.moment_of_inertia() + UT = agg_sel.principal_axes() # this is already transposed + + # diagonalizes I + Lambda = UT.dot(I.dot(UT.T)) + + I1 = Lambda[0][0] + I2 = Lambda[1][1] + I3 = Lambda[2][2] + + # detect the aggregate type + type_agg = None + # compute eccentricity + e = 1.0 - np.min([I1, I2, I3]) / np.mean([I1, I2, I3]) + eccentricities.append(e) + if e < eccentricity_threshold: + type_agg = type_map["sphere"] + else: + # I3 is the lowest value (biggest axis) + if np.abs(I1 - I2) > np.abs(I2 - I3): + type_agg = type_map["oblate"] + else: + type_agg = type_map["prolate"] + agg_types.append(type_agg) + + # gets symmetry axis + if type_agg == type_map["oblate"]: + pa = UT[0] + else: + pa = UT[2] # get the angle between the principal axis and the z-axis and rotate universe + # this does not take into consideration prolate or oblate micelles angle = np.degrees(mdamath.angle(pa, [0,0,1])) ax = transformations.rotaxis(pa, [0,0,1]) u.atoms.rotateby(angle, ax, point=cog) - # dirname = f"./rotated_cdf_{agg_size}" - # if not os.path.exists(dirname): - # os.mkdir(dirname) + if save_agg_only: + dirname = f"./agg_only_{agg_size}" - # u.atoms.write( - # os.path.join(dirname, f"rotated_{os.path.basename(snapshot)}") - # ) + if not os.path.exists(dirname): + os.mkdir(dirname) + + agg_sel.atoms.write( + os.path.join(dirname, f"centered_{resid}_{os.path.basename(snapshot)}") + ) # create selections for which CDFs will be computed for i, sel_string in enumerate(selections): @@ -268,7 +316,6 @@ def compute_rdfs( else: at_sel = u.select_atoms(f"resid {resid} and " + sel_string) - # Compute the distances between the target atoms and the center distances_matrix = at_sel.positions - cog @@ -285,13 +332,28 @@ def compute_rdfs( cdf, yedges, xedges = np.histogram2d( zdist, rdist, bins=[nbinsl, nbins], range=[[-lmax, lmax], [0, rmax]], density=False ) - cdfs[i] += cdf * np.prod(box_vectors[:3]) / natoms_normalization - - # Compute the RDF using numpy histogram - rdf, bin_edges = np.histogram( - rdist, bins=nbins, range=(0, rmax), density=False - ) - rdfs[i] += rdf * np.prod(box_vectors[:3]) / natoms_normalization + cdfs[type_agg][i] += cdf * np.prod(box_vectors[:3]) / natoms_normalization + + # Compute the RDF using numpy histogram (if sphere it's the actual RDF, not the perpendicular one) + if type_agg == type_map["sphere"]: + # Compute the distances between the target atoms and the center + distances_matrix = distances.distance_array( + cog, at_sel.positions, box=box_vectors, backend="OpenMP" + ) + + # Flatten the distances matrix to a 1D array + flattened_distances = distances_matrix.flatten() + + # Compute the RDF using numpy histogram + rdf, bin_edges = np.histogram( + flattened_distances, bins=nbins, range=(0, rmax), density=False + ) + else: + rdf, bin_edges = np.histogram( + rdist, bins=nbins, range=(0, rmax), density=False + ) + + rdfs[type_agg][i] += rdf * np.prod(box_vectors[:3]) / natoms_normalization elif not skip_rdfs: @@ -318,16 +380,24 @@ def compute_rdfs( rdfs[i] += rdf * np.prod(box_vectors[:3]) / natoms_normalization + if len(snapshots) == 0: + raise ValueError(f"No aggregates of size {agg_size} were found in the directory") + plt.rcParams["figure.figsize"] = fig_size plt.rcParams["font.size"] = 22 if compute_cdfs: - # compute the volume of each shell - # shell_volumes = np.zeros((np.size(yedges) - 1, np.size(xedges) - 1)) - # for n in range(0, np.size(yedges) -1): - # for m in range(0, np.size(xedges) - 1): - # shell_volumes[n, m] = ((2 * np.pi * (xedges[m + 1]) ** 2) - (2 * np.pi * (xedges[m - 1]) ** 2)) * np.abs(yedges[2] - yedges[1]) - shell_volumes = np.pi * (xedges[1:] ** 2 - xedges[:-1] ** 2) * np.abs(yedges[1] - yedges[0]) + # plot evolution of the aggregate types + fig, ax = plt.subplots() + ax.plot(np.arange(len(agg_types)) , agg_types) + ax.set_yticks([0, 1, 2]) + ax.set_yticklabels(["sphere", "oblate", "prolate"]) + plt.savefig(names_prefix + f"agg_type_evolution_{agg_size}.pdf") + plt.show() + plt.close('all') + + # CDF + shell_volumes_cdf = np.pi * (xedges[1:] ** 2 - xedges[:-1] ** 2) * np.abs(yedges[1] - yedges[0]) xedges /= 10.0 yedges /= 10.0 # prepare to plot @@ -336,78 +406,111 @@ def compute_rdfs( np.savetxt(names_prefix + f"{agg_size}_binsR.txt", xcenters) np.savetxt(names_prefix + f"{agg_size}_binsL.txt", ycenters) - # normalize CDFs and find the lower and largest values - minval = np.inf - maxval = -np.inf - for i in range(len(selections)): - cdfs[i] = cdfs[i] / (shell_volumes * nsnaps) - if minval > np.min(cdfs[i]): - minval = np.min(cdfs[i]) - if maxval < np.max(cdfs[i]): - maxval = np.max(cdfs[i]) - - # normalize the CDF by dividing by the shell volume and number of snapshots and plot - fig, ax = plt.subplots(nrows=2, ncols=int(np.ceil(len(selections)/2)), figsize=fig_size_cdf) - fig.tight_layout(w_pad=0.4, h_pad=1.8) - for a in ax.flatten(): - a.set_aspect(5/10) - if int(np.ceil(len(selections)/2)) == 1: - for i, sel in enumerate(selections): - ax[i].set_title(name_selections[i]) - ax[i].set_xlim(xedges[[0, -1]]) - ax[i].set_ylim(yedges[[0, -1]]) - ax[i].set_xticks(np.linspace(xedges[0], xedges[-1], 3)) - ax[i].set_yticks(np.linspace(yedges[0], yedges[-1], 7)) - np.savetxt(names_prefix + f"CDFs_{agg_size}_" + sel.replace(" ", "_") + ".txt", cdfs[i]) - im = ax[i].imshow(cdfs[i], extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], origin='lower', interpolation='bilinear', vmin=minval, vmax=maxval, cmap=plt.cm.RdBu) - ax[i].grid() - ax[i].set_xlabel(r"r (nm)") - ax[i].set_ylabel(r"h (nm)") - else: - # make the last plot blank if the number of selections is odd - if len(selections) % 2 != 0: - ax[-1][-1].axis('off') - for i, sel in enumerate(selections): - idx1, idx2 = i % 2, int(i / 2) - ax[idx1][idx2].set_title(name_selections[i]) - ax[idx1][idx2].set_xlim(xedges[[0, -1]]) - ax[idx1][idx2].set_ylim(yedges[[0, -1]]) - ax[idx1][idx2].set_xticks(np.linspace(xedges[0], xedges[-1], 3)) - ax[idx1][idx2].set_yticks(np.linspace(yedges[0], yedges[-1], 7)) - np.savetxt(names_prefix + f"CDFs_{agg_size}_" + sel.replace(" ", "_") + ".txt", cdfs[i]) - im = ax[idx1][idx2].imshow(cdfs[i], extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], origin='lower', interpolation='bilinear', vmin=minval, vmax=maxval, cmap=plt.cm.RdBu) - ax[idx1][idx2].grid() - ax[idx1][idx2].set_xlabel(r"r (nm)") - ax[idx1][idx2].set_ylabel(r"h (nm)") - # Add color bar - cax = fig.add_axes([start_colorbar, 0.15, width_colorbar, 0.7]) - cbar = fig.colorbar(im, cax=cax) - cbar.set_label('CDF') - fig.subplots_adjust(hspace=0.35, right=start_colorbar) - plt.savefig(names_prefix + f"CDFs_{agg_size}.pdf") - plt.show() - plt.close('all') - - # radial - # shell_volumes = np.pi * (bin_edges[1:] ** 2 - bin_edges[:-1] ** 2) * box_vectors[2] - shell_volumes = np.pi * (bin_edges[1:] ** 2 - bin_edges[:-1] ** 2) * 2.0 * lmax + # RDF # Prepare to plot mid_bin = (bin_edges[:-1] + bin_edges[1:]) / 2.0 np.savetxt(names_prefix + f"gperp_{agg_size}_bins.txt", mid_bin) - # Normalize the RDF by dividing by the shell volume and number of snapshots and plot - for i, sel in enumerate(selections): - rdfs[i] = rdfs[i] / (shell_volumes * nsnaps) + # count agg by type and write to file + count_agg_types = {} + for i in type_map.values(): + count_agg_types[i] = np.count_nonzero(np.array(agg_types) == i) + + # write results of agg type detection to file + with open(f"summary_compute_rdf_aggregates_{agg_size}.txt", "a") as f: + f.write("\nNumber of aggregates classified by type:\n") + for k, v in count_agg_types.items(): + e_type = np.array(eccentricities)[np.array(agg_types) == k] + f.write(f"{rev_type_map[k]}: {v} => {100.0 * v/nsnaps:.4f} %. = {np.mean(e_type):.4f} ± {np.std(e_type):.4f} \n") + + f.write("\nSnapshot number by aggregate type:\n") + snap_map = np.array(snapshots) + for k, v in type_map.items(): + f.write(f"{k}: ") + for snap in snap_map[np.array(agg_types) == v]: + f.write(f"{snap}, ") + f.write("\n") + + # for each type of aggregate, normalize the CDFs/RDFs and plot + for agg_type in type_map.values(): + # skip if there are no CDFs to plot + if count_agg_types[agg_type] == 0: + continue + + # normalize CDFs and find the lower and largest values + minval = np.inf + maxval = -np.inf + for i in range(len(selections)): + cdfs[agg_type][i] = cdfs[agg_type][i] / (shell_volumes_cdf * count_agg_types[agg_type]) + if minval > np.min(cdfs[agg_type][i]): + minval = np.min(cdfs[agg_type][i]) + if maxval < np.max(cdfs[agg_type][i]): + maxval = np.max(cdfs[agg_type][i]) + + # normalize the CDF by dividing by the shell volume and number of snapshots and plot + fig, ax = plt.subplots(nrows=2, ncols=int(np.ceil(len(selections)/2)), figsize=fig_size_cdf) + fig.tight_layout(w_pad=0.4, h_pad=1.8) + for a in ax.flatten(): + a.set_aspect(5/10) + if int(np.ceil(len(selections)/2)) == 1: + for i, sel in enumerate(selections): + ax[i].set_title(name_selections[i]) + ax[i].set_xlim(xedges[[0, -1]]) + ax[i].set_ylim(yedges[[0, -1]]) + ax[i].set_xticks(np.linspace(xedges[0], xedges[-1], 3)) + ax[i].set_yticks(np.linspace(yedges[0], yedges[-1], 7)) + np.savetxt(names_prefix + f"CDFs_{agg_size}_" + sel.replace(" ", "_") + "_" + rev_type_map[agg_type] + ".txt", cdfs[agg_type][i]) + im = ax[i].imshow(cdfs[agg_type][i], extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], origin='lower', interpolation='bilinear', vmin=minval, vmax=maxval, cmap=plt.cm.RdBu) + ax[i].grid() + ax[i].set_xlabel(r"r (nm)") + ax[i].set_ylabel(r"h (nm)") + else: + # make the last plot blank if the number of selections is odd + if len(selections) % 2 != 0: + ax[-1][-1].axis('off') + for i, sel in enumerate(selections): + idx1, idx2 = i % 2, int(i / 2) + ax[idx1][idx2].set_title(name_selections[i]) + ax[idx1][idx2].set_xlim(xedges[[0, -1]]) + ax[idx1][idx2].set_ylim(yedges[[0, -1]]) + ax[idx1][idx2].set_xticks(np.linspace(xedges[0], xedges[-1], 3)) + ax[idx1][idx2].set_yticks(np.linspace(yedges[0], yedges[-1], 7)) + np.savetxt(names_prefix + f"CDFs_{agg_size}_" + sel.replace(" ", "_") + "_" + rev_type_map[agg_type] + ".txt", cdfs[agg_type][i]) + im = ax[idx1][idx2].imshow(cdfs[agg_type][i], extent=[xedges[0], xedges[-1], yedges[0], yedges[-1]], origin='lower', interpolation='bilinear', vmin=minval, vmax=maxval, cmap=plt.cm.RdBu) + ax[idx1][idx2].grid() + ax[idx1][idx2].set_xlabel(r"r (nm)") + ax[idx1][idx2].set_ylabel(r"h (nm)") + # Add color bar + cax = fig.add_axes([start_colorbar, 0.15, width_colorbar, 0.7]) + cbar = fig.colorbar(im, cax=cax) + cbar.set_label('CDF') + fig.subplots_adjust(hspace=0.35, right=start_colorbar) + plt.savefig(names_prefix + f"CDFs_{agg_size}_{rev_type_map[agg_type]}.pdf") + plt.show() + plt.close('all') + + # radial + # Normalize the RDF by dividing by the shell volume and number of snapshots and plot + if agg_type == type_map["sphere"]: + shell_volumes_rdf = (4.0 / 3.0) * np.pi * (bin_edges[1:] ** 3 - bin_edges[:-1] ** 3) + else: + shell_volumes_rdf = np.pi * (bin_edges[1:] ** 2 - bin_edges[:-1] ** 2) * 2.0 * lmax - plt.plot(mid_bin / 10.0, rdfs[i], linewidth=3, label=name_selections[i]) + for i, sel in enumerate(selections): + rdfs[agg_type][i] = rdfs[agg_type][i] / (shell_volumes_rdf * count_agg_types[agg_type]) - np.savetxt(names_prefix + f"gperp_{agg_size}_" + sel.replace(" ", "_") + ".txt", rdfs[i]) + plt.plot(mid_bin / 10.0, rdfs[agg_type][i], linewidth=3, label=name_selections[i]) - plt.legend(frameon=False) - plt.xlabel(r"r (nm)") - plt.ylabel(r"g$_\perp$(r)") - plt.savefig(names_prefix + f"radial_RDFs_{agg_size}.pdf", bbox_inches="tight") - plt.show() + np.savetxt(names_prefix + f"gperp_{agg_size}_" + sel.replace(" ", "_") + rev_type_map[agg_type] + ".txt", rdfs[agg_type][i]) + + plt.legend(frameon=False) + plt.xlabel(r"r (nm)") + if agg_type == type_map["sphere"]: + plt.ylabel(r"g(r)") + else: + plt.ylabel(r"g$_\perp$(r)") + plt.savefig(names_prefix + f"radial_RDFs_{agg_size}_{rev_type_map[agg_type]}.pdf", bbox_inches="tight") + plt.show() # check if rdfs are skipped elif not skip_rdfs: @@ -482,7 +585,7 @@ def compute_rdfs( ) f.write("a = {} A, b = {} A, c = {} A\n\n".format(a, b, c)) - f.write("alpha = {}\n\n".format((2 * Ia - Ib - Ic) / (Ia + Ib + Ic))) + f.write("e = {}\n\n".format(1.0 - np.min([Ia, Ib, Ic]) / np.mean([Ia, Ib, Ic]))) f.write("Rg = {}±{} A\n".format(Rgm, np.std(Rg))) if compute_pddf: @@ -631,6 +734,12 @@ def compute_rdfs( default=0.02, help="width of the colorbar (default = 0.02)", ) + parser.add_argument( + "--eccentricity-threshold", + type=float, + default=0.12, + help="eccentricity threshold to classify the aggregate as a sphere (default = 0.12)", + ) args = parser.parse_args() @@ -647,7 +756,7 @@ def compute_rdfs( topol = process_topology(args.topol) # write options to file - with open("summary_compute_rdf_aggregates.txt", "w") as f: + with open(f"summary_compute_rdf_aggregates_{args.agg_size}.txt", "w") as f: f.write("Command: " + " ".join(sys.argv) + "\n") compute_rdfs( @@ -672,6 +781,7 @@ def compute_rdfs( args.names_prefix, args.start_colorbar, args.width_colorbar, + args.eccentricity_threshold, tuple(args.fig_size), tuple(args.fig_size_cdf), ) diff --git a/utils/order_parameter_agg.py b/utils/order_parameter_agg.py new file mode 100644 index 00000000..f0dc55e3 --- /dev/null +++ b/utils/order_parameter_agg.py @@ -0,0 +1,328 @@ +import MDAnalysis as mda +from MDAnalysis.lib import distances, mdamath, transformations +from glob import glob +import argparse +from tqdm import tqdm +import os +import numpy as np +import tomli + +def process_topology(topol_path): + with open(topol_path, "rb") as f: + topol = tomli.load(f) + + if "include" in topol["system"]: + for file in topol["system"]["include"]: + path = os.path.join(os.path.dirname(topol_path), file) + with open(path, "rb") as f: + itps = tomli.load(f) + for mol, itp in itps.items(): + topol[mol] = itp + + return topol + +def parse_masses(fname): + if not fname: + return None + + masses = {} + with open(fname, "r") as f: + for line in f: + masses[line.split()[0]] = [float(x) for x in line.split()[1:]] + return masses + + +def get_index_beads(topol, bead1, bead2): + for i, atom in enumerate(topol["atoms"]): + if atom[1] == bead1: + bead1_index = i + elif atom[1] == bead2: + bead2_index = i + return bead1_index, bead2_index + + +def compute_order_parameter(topol, pdbdir, agg_size, bead1, bead2, masses, cylinder_length, center, eccentricity_threshold=0.14): + type_map = {"sphere": 0, "oblate": 1, "prolate": 2} + agg_types = [] + all_order_parameters = [] + all_cosines = [] + all_order_parameters_cylinder = [] + all_cosines_cylinder = [] + all_order_parameters_cap = [] + all_cosines_cap = [] + all_distances = [] + distances_sphere = [] + distances_prolate = [] + all_angles = [] + angles_sphere = [] + angles_prolate = [] + angles_cap = [] + + nsnaps = 0 + for snapshot in tqdm(glob(os.path.join(pdbdir, "*.pdb"))): + u = mda.Universe(snapshot) + + # based on topology and residues determine size of aggregates + resids = {} + for residue in u.residues: + nummols = int( + residue.atoms.positions.shape[0] / topol[residue.resname]["atomnum"] + ) + if residue.resid in resids: + resids[residue.resid] += nummols + else: + resids[residue.resid] = nummols + + # check if we can find an aggregate of the given size + agg_resids = [] + for k, v in resids.items(): + if v == agg_size: + agg_resids.append(k) + + # if size was not found, go to next pdb + if len(agg_resids) == 0: + continue + + # for each aggregate of given size, get the order parameter + for ididx, resid in enumerate(agg_resids): + if ididx > 0: + u = mda.Universe(snapshot) + nsnaps += 1 + box_vectors = u.dimensions + agg_sel = u.select_atoms(f"resid {resid}") + + if not center: + center_coord = u.select_atoms(f"resid {resid}").positions + else: + center_coord = u.select_atoms(f"resid {resid} and " + center).positions + + # get the geometric center using the algorithm: + # https://en.wikipedia.org/wiki/Center_of_mass#Systems_with_periodic_boundary_conditions + tetha = (center_coord / box_vectors[:3]) * (2.0 * np.pi) + xi = np.cos(tetha) + zeta = np.sin(tetha) + + xi_bar = np.mean(xi, axis=0) + zeta_bar = np.mean(zeta, axis=0) + tetha_bar = np.arctan2(-zeta_bar, -xi_bar) + np.pi + + cog = (box_vectors[:3] * tetha_bar) / (2.0 * np.pi) + + box_center = box_vectors[:3] / 2.0 + u.atoms.translate(box_center - cog) + u.atoms.wrap(compound="atoms") + cog = box_center + + # set the masses + i = 0 + mass_array = [] + while i < len(agg_sel.resnames): + name = agg_sel.resnames[i] + for bmass in masses[name]: + mass_array.append(bmass) + i += len(masses[name]) + + mass_array = np.array(mass_array) + agg_sel.masses = mass_array + total_mass = np.sum(mass_array) + + # get moment of inertia and principal axis + I = agg_sel.moment_of_inertia() + UT = agg_sel.principal_axes() # this is already transposed + + # diagonalizes I + Lambda = UT.dot(I.dot(UT.T)) + + I1 = Lambda[0][0] + I2 = Lambda[1][1] + I3 = Lambda[2][2] + + # detect the aggregate type + type_agg = None + # compute eccentricity + e = 1.0 - np.min([I1, I2, I3]) / np.mean([I1, I2, I3]) + if e < eccentricity_threshold: + type_agg = type_map["sphere"] + else: + # I3 is the lowest value (biggest axis) + if np.abs(I1 - I2) > np.abs(I2 - I3): + continue # if oblate, skip + # type_agg = type_map["oblate"] + else: + type_agg = type_map["prolate"] + agg_types.append(type_agg) + + # gets symmetry axis + if type_agg == type_map["oblate"]: + pa = UT[0] + else: + pa = UT[2] + + # get the angle between the principal axis and the z-axis and rotate universe + # this does not take into consideration prolate or oblate micelles + angle = np.degrees(mdamath.angle(pa, [0,0,1])) + ax = transformations.rotaxis(pa, [0,0,1]) + u.atoms.rotateby(angle, ax, point=cog) + + # iterate over all molecules computing the order parameter + molecule_order_parameters = [] + simple_cosine = [] + cylinder_order_parameters = [] + cylinder_cosine = [] + cap_order_parameters = [] + cap_cosine = [] + for mol in agg_sel.residues: + nmols_residue = int(mol.atoms.positions.shape[0] / topol[mol.resname]["atomnum"]) + for i in range(nmols_residue): + mol_at_positions = mol.atoms.positions[i * topol[mol.resname]["atomnum"] : (i + 1) * topol[mol.resname]["atomnum"]] - cog + # get positions of bead only + bead1_index, bead2_index = get_index_beads(topol[mol.resname], bead1, bead2) + orient_vec = mol_at_positions[bead2_index] - mol_at_positions[bead1_index] + all_distances.append(np.linalg.norm(orient_vec)) + + if type_agg == type_map["sphere"]: + distances_sphere.append(np.linalg.norm(orient_vec)) + cm_vec = mol_at_positions[bead1_index] + + # compute order parameter + cosine = np.dot(cm_vec, orient_vec) / (np.linalg.norm(cm_vec) * np.linalg.norm(orient_vec)) + molecule_order_parameters.append(0.5 * (3.0 * cosine * cosine - 1.0)) + simple_cosine.append(cosine) + angles_sphere.append(np.arccos(cosine) * 180.0 / np.pi) + + elif type_agg == type_map["prolate"]: + distances_prolate.append(np.linalg.norm(orient_vec)) + # find if the molecule is in the cylindrical or cap regions + if mol_at_positions[bead1_index][2] > cylinder_length: # top cap + cm_vec = mol_at_positions[bead1_index] + cm_vec[2] -= cylinder_length + + cosine = np.dot(cm_vec, orient_vec) / (np.linalg.norm(cm_vec) * np.linalg.norm(orient_vec)) + cap_order_parameters.append(0.5 * (3.0 * cosine * cosine - 1.0)) + cap_cosine.append(cosine) + angles_cap.append(np.arccos(cosine) * 180.0 / np.pi) + elif mol_at_positions[bead1_index][2] < -cylinder_length: # bottom cap + cm_vec = mol_at_positions[bead1_index] + cm_vec[2] += cylinder_length + + cosine = np.dot(cm_vec, orient_vec) / (np.linalg.norm(cm_vec) * np.linalg.norm(orient_vec)) + cap_order_parameters.append(0.5 * (3.0 * cosine * cosine - 1.0)) + cap_cosine.append(cosine) + angles_cap.append(np.arccos(cosine) * 180.0 / np.pi) + else: # cylinder part + cm_vec = mol_at_positions[bead1_index] + cm_vec[2] = 0.0 + + cosine = np.dot(cm_vec, orient_vec) / (np.linalg.norm(cm_vec) * np.linalg.norm(orient_vec)) + cylinder_order_parameters.append(0.5 * (3.0 * cosine * cosine - 1.0)) + cylinder_cosine.append(cosine) + angles_prolate.append(np.arccos(cosine) * 180.0 / np.pi) + + + all_angles.append(np.arccos(cosine) * 180.0 / np.pi) + + if type_agg == type_map["sphere"]: + all_order_parameters.append(np.mean(molecule_order_parameters)) + all_cosines.append(np.mean(simple_cosine)) + elif type_agg == type_map["prolate"]: + all_order_parameters_cylinder.append(np.mean(cylinder_order_parameters)) + all_cosines_cylinder.append(np.mean(cylinder_cosine)) + all_order_parameters_cap.append(np.mean(cap_order_parameters)) + all_cosines_cap.append(np.mean(cap_cosine)) + + print(f"Aggregate size: {agg_size}") + print(f"Eccentricity threshold: {eccentricity_threshold}") + print(f"Prolate cylinder half length: {cylinder_length}\n") + + print(f"Percentage of spheres: {agg_types.count(type_map['sphere']) / nsnaps}") + print(f"Percentage of oblates: {agg_types.count(type_map['oblate']) / nsnaps}") + print(f"Percentage of prolates: {agg_types.count(type_map['prolate']) / nsnaps}\n") + + print(f"Mean distance between beads: {np.mean(all_distances)} ± {np.std(all_distances)}") + print(f"Mean distance between beads (sphere): {np.mean(distances_sphere)} ± {np.std(distances_sphere)}") + print(f"Mean distance between beads (prolate): {np.mean(distances_prolate)} ± {np.std(distances_prolate)}\n") + + print(f"Mean order parameter (sphere): {np.mean(all_order_parameters)} ± {np.std(all_order_parameters)}") + print(f"Mean cosine (sphere): {np.mean(all_cosines)} ± {np.std(all_cosines)}") + print(f"Mean order parameter (cylinder): {np.mean(all_order_parameters_cylinder)} ± {np.std(all_order_parameters_cylinder)}") + print(f"Mean cosine (cylinder): {np.mean(all_cosines_cylinder)} ± {np.std(all_cosines_cylinder)}") + print(f"Mean order parameter (cap): {np.mean(all_order_parameters_cap)} ± {np.std(all_order_parameters_cap)}") + print(f"Mean cosine (cap): {np.mean(all_cosines_cap)} ± {np.std(all_cosines_cap)}\n") + + print(f"Mean angle (all): {np.mean(all_angles)} ± {np.std(all_angles)}") + + import matplotlib.pyplot as plt + plt.rcParams["figure.figsize"] = (10, 8) + plt.rcParams["font.size"] = 22 + + # Plotting distributions + angles = [all_angles, angles_cap, angles_prolate, angles_sphere] + labels = ['All Angles', 'Cap Angles', 'Prolate Angles', 'Sphere Angles'] + colors = ['C0', 'C1', 'C2', 'C3'] + counts = [len(angle_list) for angle_list in angles] + sorted_angles = [angle_list for _, angle_list in sorted(zip(counts, angles), reverse=True)] + sorted_labels = [label for _, label in sorted(zip(counts, labels), reverse=True)] + sorted_colors = [color for _, color in sorted(zip(counts, colors), reverse=True)] + + for angle_list, label, color in zip(sorted_angles, sorted_labels, sorted_colors): + plt.hist(angle_list, bins=50, alpha=0.5, label=label, color=color) + + # Adding labels and legend + plt.xlabel(r'Angle ($\degree$)') + plt.ylabel('Frequency') + plt.legend() + + # Displaying the plot + plt.savefig(f"distr_angles_surface_{agg_size}.pdf", bbox_inches='tight') + plt.show() + + +if __name__ == "__main__": + description = "Given the directory with all the .pdb generated by aggregates.py compute the order parameters for each aggregate type" + parser = argparse.ArgumentParser(description=description) + parser.add_argument("topol", type=str, help=".toml topology files") + parser.add_argument("pdb_dir", type=str, help="directory containing the .pdb") + parser.add_argument( + "agg_size", type=int, help="aggregate size for which the RDFs will be computed" + ) + parser.add_argument( + "bead1", + type=str, + help='name of first bead in the order parameter calculation. Example: "TN2a"', + ) + parser.add_argument( + "bead2", + type=str, + help='name of second bead in the order parameter calculation. Example: "TP1"', + ) + parser.add_argument( + "masses", + type=str, + default=None, + help="file containing residue name (molecule name) in first column and a column with the mass for each bead", + ) + parser.add_argument( + "--cylinder-length", + type=float, + default=0.0, + help="HALF length of the cylinder (not considering the cap) to compute the order parameter", + ) + parser.add_argument( + "--eccentricity-threshold", + type=float, + default=0.14, + help="eccentricity threshold to determine the type of aggregate", + ) + parser.add_argument( + "--center", + type=str, + default=False, + help="selection for the center (if not used the center is the centroid)", + ) + + args = parser.parse_args() + + topol = process_topology(args.topol) + + compute_order_parameter(topol, args.pdb_dir, args.agg_size, args.bead1, args.bead2, parse_masses(args.masses), args.cylinder_length, args.center, args.eccentricity_threshold) +