Skip to content

Commit

Permalink
merge with master
Browse files Browse the repository at this point in the history
  • Loading branch information
mimakaev committed Nov 4, 2019
2 parents f1a8a45 + 6da05ab commit 3731984
Show file tree
Hide file tree
Showing 22 changed files with 720 additions and 155 deletions.
6 changes: 3 additions & 3 deletions examples/example/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import os,sys
import polychrom
from polychrom import (simulation, starting_conformations,
forces, extra_forces, forcekits)
forces, forcekits)
import simtk.openmm as openmm
import os
from polychrom.hdf5_format import HDF5Reporter
Expand All @@ -27,7 +27,7 @@ def exampleOpenmm():
integrator="variableLangevin",
error_tol=0.002,
GPU = "0",
collision_rate=0.02,
collision_rate=0.1,
N = 10000,
reporters=[reporter])

Expand Down Expand Up @@ -91,7 +91,7 @@ def exampleOpenmm():

angle_force_func=forces.angle_force,
angle_force_kwargs={
'k':0.05
'k':1.5,
# K is more or less arbitrary, k=4 corresponds to presistence length of 4,
# k=1.5 is recommended to make polymer realistically flexible; k=8 is very stiff
},
Expand Down
Binary file removed examples/example/trajectory/block0.dat
Binary file not shown.
Binary file removed examples/example/trajectory/block1.dat
Binary file not shown.
Binary file removed examples/example/trajectory/block10.dat
Binary file not shown.
Binary file removed examples/example/trajectory/block2.dat
Binary file not shown.
Binary file removed examples/example/trajectory/block3.dat
Binary file not shown.
Binary file removed examples/example/trajectory/block4.dat
Binary file not shown.
Binary file removed examples/example/trajectory/block5.dat
Binary file not shown.
Binary file removed examples/example/trajectory/block6.dat
Binary file not shown.
Binary file removed examples/example/trajectory/block7.dat
Binary file not shown.
Binary file removed examples/example/trajectory/block8.dat
Binary file not shown.
Binary file removed examples/example/trajectory/block9.dat
Binary file not shown.
255 changes: 185 additions & 70 deletions examples/storage_formats/hdf5_reporter.ipynb

Large diffs are not rendered by default.

115 changes: 114 additions & 1 deletion polychrom/__polymer_math.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,16 @@ inline double dotProduct(point a, point b){
// }

inline double dist(point a,point b){
return sqr(a.x-b.x)+sqr(a.y-b.y)+sqr(a.z-b.z);
return sqrt( (a.x-b.x)*(a.x-b.x)
+(a.y-b.y)*(a.y-b.y)
+(a.z-b.z)*(a.z-b.z));
}

// inline double dist(int i,int j){
// return sqrt(dotProduct((position[i]-position[j]),(position[i]-position[j])));
// }


int intersect(point t1,point t2,point t3,point r1,point r2) {
point A,B,C,D,n;

Expand Down Expand Up @@ -309,3 +316,109 @@ void _mutualSimplifyCpp (
}


int _simplifyCpp (double *datax, double *datay, double *dataz, int N)
{
int M = 0;
int k1;
int sum = 0;
int t=0,s=0,k=0;
int turn=0;
bool breakflag;
float maxdist;
vector <point> position;
vector <point> newposition;
vector <int> todelete;
int i;

position=vector<point>(N);
newposition=vector<point>(N);

for (i=0;i<N;i++)
{
position[i].x = datax[i] + 0.000000000001*(rand()%1000);
position[i].y = datay[i] + 0.00000000000001*(rand()%1000);
position[i].z = dataz[i] + 0.0000000000001*(rand()%1000);
}

todelete = vector <int> (N);
for (i=0;i<N;i++) todelete[i] == -2;
for (int xxx = 0; xxx < 1000; xxx++)
{
maxdist = 0;
for (i=0;i<N-1;i++)
{
if (dist(position[i],position[i+1]) > maxdist) {maxdist = dist(position[i],position[i+1]);}
}
turn++;
M=0;
for (i=0;i<N;i++) todelete[i] = -2;
for (int j=1;j<N-1;j++) //going over all elements trying to delete
{
breakflag = false; //by default we delete thing

for (k=0;k<N;k++) //going over all triangles to check
{
long double dd = dist(position[j],position[k]);
if (dd < 2 * maxdist)
{

if (k < j-2 || k > j+1)
{
if (k < N-1) k1 = k+1;
else k1 = 0;
sum = intersect(
position[j-1],position[j],position[j+1],
position[k],position[k1]);
if (sum!=0)
{
//printf("intersection at %d,%d\n",j,k);
breakflag = true; //keeping thing
break;
}
}
}
else
{
k+= max(((int)((float)dd/(float)maxdist )- 3), 0);
}
}
if (breakflag ==false)
{
todelete[M++] = j;
position[j] = (position[j-1] + position[j+1])* 0.5;
//printf("%d will be deleted at %d\n",j,k);
j++;
//break;
}
}
t = 0;//pointer for todelete
s = 0;//pointer for newposition
if (M==0)
{
break;
}
for (int j=0;j<N;j++)
{
if (todelete[t] == j)
{
t++;
continue;
}
else
{
newposition[s++] = position[j];
}
}
M = 0;
t = 0;
position = newposition;
}

for (i=0;i<s;i++)
{
datax[i] = position[i].x;
datay[i] = position[i].y;
dataz[i] = position[i].z;
}
return s;
}
2 changes: 2 additions & 0 deletions polychrom/__polymer_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,7 @@ void _mutualSimplifyCpp (
double *datax2, double *datay2, double *dataz2, int N2,
long *ret
);

int _simplifyCpp (double *datax, double *datay, double *dataz, int N);

#endif
23 changes: 23 additions & 0 deletions polychrom/_polymer_math.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ cdef extern from "__polymer_math.h":
double *datax2, double *datay2, double *dataz2, int N2,
long *ret
)
int _simplifyCpp (double *datax, double *datay, double *dataz, int N)




Expand Down Expand Up @@ -72,3 +74,24 @@ def mutualSimplify(data1, data2):
data2 = np.array([datax2, datay2, dataz2]).T

return data1[:ret[0]], data2[:ret[1]]


def simplifyPolymer(data):
"""a weave.inline wrapper for polymer simplification code
Calculates a simplified topologically equivalent polymer ring"""

if len(data) != 3:
data = np.transpose(data)
if len(data) != 3:
raise ValueError("Wrong dimensions of data")
cdef double[:] datax = np.array(data[0], float, order="C")
cdef double[:] datay = np.array(data[1], float, order="C")
cdef double[:] dataz = np.array(data[2], float, order="C")

cdef int N = len(datax)

new_N = _simplifyCpp(&datax[0], &datay[0], &dataz[0], N)

data = np.array([datax, datay, dataz]).T

return data[:new_N]
44 changes: 18 additions & 26 deletions polychrom/contactmaps.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,27 @@
from . import polymer_analyses

"""
This file defines the class that allows averaging contacts from multile processes into one shared contactmap.
This module is the main workhorse of tools to calculate contactmaps, both from polymer simulations and from other simulations (e.g. 1D simulations of loop extrusion). All of the functions here are designed to be parallelized, and lots of efforts were put into making this possible.
It was not an easy function to write, and as a result it is a little messy. But it is necessary for a set of reasons.
-- Calculating contact maps is slow, and benefits greatly from parallelizing
The reasons we need parallel contactmap code is the following:
-- Calculating contact maps is slow, esp. at large contact radii, and benefits greatly from parallelizing
-- Doing regular multiprocesing.map has limitations
-- It can only handle heataps up to some size, and transferring gigabyte-sized heatmaps between processes takes minutes
-- It can only do as many heatmaps as fits in RAM, which on 20-core 128GB machine is no more than 5GB/heatmap
-- Historically, we had troubles with heatmaps over 6000x6000 monomers being transferred between processes using pickle
So to combat that we implemented this module. It can create one big heatmap, and keep in in shared memory.
Different processes can write to that.
The structure of this class is as follows.
On the outer lefe, it provides three methods to average contactmaps: monomerResolutionContactMap, binnedContactMap,
On the outer level, it provides three methods to average contactmaps: monomerResolutionContactMap, binnedContactMap,
and monomerResolutionContactMapSubchains
The first two create contact map from an entire file. The last one creates contact maps from sub-chains in a file, starting at a
given set of starts. It is useful when doing contact maps from several copies of a system in one simulation.
The first two create contact map from an entire file: either monomer-resolution or binned. The last one creates contact maps from sub-chains in a file, starting at a given set of starting points. It is useful when doing contact maps from several copies of a system in one simulation.
The first two methods have a legacy implementation from the old library that is still here to do the tests.
It also provides a more generic method "averageContacts". You can use it to average contacts obtained from
polymer simulations, or from any other simulation.
On the middle level, it provides a method "averageContacts". This method accepts a "contact iterator", and can be used to average contacts from both a set of filenames and from a simulation of some kind (e.g. averaging positions of loop extruding factors from a 1D loop extrusion simulation). All of the outer level functions (monomerResolutionContactMap for example) are implemented using this method.
On the lower level, there are internals of the "averageContacts" method and an associated "worker" function. There is generally no need to understand the code of those functions. There exists a reference implementation of both the worker and the averageContacts function, named "simpleWorker" and "averageContactsSimple". They do all the things that "averageContacts" do, but on only one core. In fact, "averageContacts" defaults to "averageContactsSimple" if requested to run on one core because it is a little bit faster.
It also will provide an example of how to average contacts from a non-polymer simulation, and run this simulation on many cores.
"""
def indexing(smaller, larger, M):
"""converts x-y indexes to index in the upper triangular matrix"""
Expand Down Expand Up @@ -321,8 +318,6 @@ def __init__(self, filenames, cutoff = 1.7, loadFunction=None, exceptionsToIgnor
When initialized, the iterator should store these args properly and create all necessary constructs
"""
from openmmlib import contactmaps
self.contactmaps = contactmaps
self.filenames = filenames
self.cutoff = cutoff
self.exceptionsToIgnore = exceptionsToIgnore
Expand Down Expand Up @@ -350,8 +345,8 @@ def next(self):
return contacts

def monomerResolutionContactMap(filenames,
cutoff=1.7,
n=4, # Num threads
cutoff=5,
n=8, # Num threads
contactFinder = polymer_analyses.calculate_contacts,
loadFunction=polymerutils.load,
exceptionsToIgnore=[], useFmap=False):
Expand All @@ -362,8 +357,8 @@ def monomerResolutionContactMap(filenames,
return averageContacts(filenameContactMap,values,N, classInitArgs=args, useFmap=useFmap, uniqueContacts = True, nproc=n)


def binnedContactMap(filenames, chains=None, binSize=5, cutoff=1.7,
n=4, # Num threads
def binnedContactMap(filenames, chains=None, binSize=5, cutoff=5,
n=8, # Num threads
contactFinder = polymer_analyses.calculate_contacts,
loadFunction=polymerutils.load,
exceptionsToIgnore=None, useFmap=False):
Expand Down Expand Up @@ -421,8 +416,6 @@ def __init__(self, filenames, mapStarts, mapN, cutoff = 1.7, loadFunction=None,
When initialized, the iterator should store these args properly and create all necessary constructs
"""
from openmmlib import contactmaps
self.contactmaps = contactmaps
self.filenames = filenames
self.cutoff = cutoff
self.exceptionsToIgnore = exceptionsToIgnore
Expand Down Expand Up @@ -463,8 +456,8 @@ def next(self):
def monomerResolutionContactMapSubchains(filenames,
mapStarts,
mapN,
cutoff=1.7,
n=4, # Num threads
cutoff=5,
n=8, # Num threads
method = polymer_analyses.calculate_contacts,
loadFunction=polymerutils.load,
exceptionsToIgnore=[], useFmap=False):
Expand Down Expand Up @@ -492,7 +485,6 @@ def next(self):

def _test():
ars = [np.random.random((60,3)) * 4 for _ in range(200)]
import openmmlib.contactmaps
conts = polymer_analyses.calculate_contacts(ars[0],1)
print(conts.shape)
args = np.repeat(np.arange(20, dtype=int), 10)
Expand All @@ -512,8 +504,8 @@ def _test():
assert np.allclose(cmap1, cmap4)
assert np.allclose(cmap1, cmap3)

from .legacy_contactmaps import averagePureContactMap as cmapPureMap
from .legacy_contactmaps import averageBinnedContactMap as cmapBinnedMap
from .legacy.contactmaps import averagePureContactMap as cmapPureMap
from .legacy.contactmaps import averageBinnedContactMap as cmapBinnedMap

for n in [1, 5, 20]:
cmap6 = monomerResolutionContactMap(range(200), cutoff = 1, loadFunction=lambda x:ars[x], n=n)
Expand Down
12 changes: 12 additions & 0 deletions polychrom/forcekits.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,18 @@ def polymer_chains(
If True then do not calculate non-bonded forces between the
particles connected by a bond. True by default.
"""



force_list = []

bonds = []
triplets = []
newchains = []

for start, end, is_ring in chains:
end = sim_object.N if (end is None) else end
newchains.append((start, end, is_ring))

bonds += [(j, j+1) for j in range(start, end - 1)]
triplets += [(j - 1, j, j + 1) for j in range(start + 1, end - 1)]
Expand All @@ -48,6 +53,13 @@ def polymer_chains(
bonds.append((start, end-1))
triplets.append((int(end - 2), int(end - 1), int(start)))
triplets.append((int(end - 1), int(start), int(start + 1)))

reportDict = {"chains":np.array(newchains, dtype=int),
"bonds": np.array(bonds, dtype=int),
"angles": np.array(triplets)
}
for reporter in sim_object.reporters:
reporter.report("forcekit_polymer_chains", reportDict)

force_list.append(
bond_force_func(sim_object, bonds, **bond_force_kwargs)
Expand Down
Loading

0 comments on commit 3731984

Please sign in to comment.