From ea95b14c14b760317bdb3d42dc7d39bda36168b7 Mon Sep 17 00:00:00 2001
From: Philipp Pracht <pp555@cam.ac.uk>
Date: Sat, 10 Feb 2024 20:01:10 +0000
Subject: [PATCH] Internal update of CREGEN interface

Signed-off-by: Philipp Pracht <pp555@cam.ac.uk>
---
 src/algos/ConfSolv.F90                | 246 +++++++---
 src/algos/playground.f90              |   5 +-
 src/algos/refine.f90                  |  11 +-
 src/algos/search_conformers.f90       |   1 +
 src/algos/search_entropy.f90          |   1 +
 src/algos/search_mecp.f90             |   1 +
 src/algos/search_newnci.f90           |   1 +
 src/classes.f90                       |   2 +
 src/cregen.f90                        | 114 +++--
 src/crest_main.f90                    |   3 +
 src/ensemblecomp.f90                  | 619 +++++++++++++-------------
 src/filemod.f90                       |   2 +-
 src/legacy_algos/confscript2_main.f90 |   1 +
 src/legacy_algos/confscript2_misc.f90 |   3 +
 src/legacy_algos/confscript3.f90      |   1 +
 src/legacy_algos/relaxensemble.f90    |   1 +
 src/parsing/parse_csv.f90             |  99 ++++
 src/parsing/parse_maindata.f90        |   5 +
 src/propcalc.f90                      |   1 +
 src/qcg/solvtool.f90                  |   1 +
 20 files changed, 695 insertions(+), 423 deletions(-)

diff --git a/src/algos/ConfSolv.F90 b/src/algos/ConfSolv.F90
index 48f7b07e..29ae231e 100644
--- a/src/algos/ConfSolv.F90
+++ b/src/algos/ConfSolv.F90
@@ -40,14 +40,18 @@ module ConfSolv_module
   !> ConfSolv teardown instruction
   logical :: cs_teardown = .false.
   !> Keeping track of setup. Has it been called already?
-  logical :: cs_setup = .false. 
+  logical :: cs_setup = .false.
 
   !> ConfSolv parameter location
   character(len=:),allocatable :: cs_param
   !> ConfSolv solvent & smiles
   character(len=:),allocatable :: cs_solvent
+  character(len=:),allocatable :: cs_solvfile
   character(len=:),allocatable :: cs_smiles
 
+  !> n_threshold_mols
+  integer :: cs_n_threshold_mols = 1
+
 !========================================================================================!
 !========================================================================================!
 contains  !> MODULE PROCEDURES START HERE
@@ -96,9 +100,36 @@ subroutine cs_shutdown(io)
       call kill(cs_pid,9,io)
       deallocate (cs_pid)
     end if
+    call cs_shutdown2(io)
 
   end subroutine cs_shutdown
 
+!=================================================!
+  subroutine cs_shutdown2(io)
+    use iomod
+    implicit none     
+    integer,intent(out) :: io
+    integer :: ich,ro,pids
+    character(len=100) :: str
+    character(len=50) :: str2
+    call command('lsof confsolv.out > tmpcs 2>/dev/null',io)
+
+    open(newunit=ich,file='tmpcs')
+    do
+      read(ich,'(a)',iostat=ro) str
+      if(ro < 0 ) exit
+      !write(*,*) trim(str)
+      read(str,*,iostat=ro) str2,pids
+      if(ro == 0)then
+      !   write(*,*) pids
+         call kill(pids,9,io) 
+      endif
+    enddo
+    close(ich)
+
+    call remove('tmpcs') 
+  end subroutine cs_shutdown2
+
 !=================================================!
 
   subroutine cs_deploy()
@@ -108,31 +139,31 @@ subroutine cs_deploy()
     character(len=50) :: atmp
     logical :: ex
 
-    if(.not.allocated(cs_bin)) cs_bin = 'confsolvserver'
-    call remove('confsolv.out')
+    if (.not.allocated(cs_bin)) cs_bin = 'confsolvserver'
+    !call remove('confsolv.out')
     call remove('config_template.toml')
 
     job = 'nohup '//trim(cs_bin)//' -l '//'> confsolv.out 2>/dev/null'//' &'
 
-    write(stdout,'(2x,a,a)') 'Hosting command: ',trim(job)
+    write (stdout,'(2x,a,a)') 'Hosting command: ',trim(job)
     call command(job,io)
 
-    if(io /= 0) error stop '**ERROR** failed to host ConfSolv server'
+    if (io /= 0) error stop '**ERROR** failed to host ConfSolv server'
     !call sleep(3)
-    do 
+    do
       call sleep(1)
-      inquire(file='config_template.toml',exist=ex)
-      if(ex) exit
-    enddo
+      inquire (file='config_template.toml',exist=ex)
+      if (ex) exit
+    end do
 
     !> read port and pid
-    open(newunit=ich,file='confsolv.out')
-    read(ich,*) atmp,cs_pid
-    read(ich,*) atmp,cs_port
-    close(ich) 
+    open (newunit=ich,file='confsolv.out')
+    read (ich,*) atmp,cs_pid
+    read (ich,*) atmp,cs_port
+    close (ich)
     cs_teardown = .true.
     cs_setup = .false.
-    write(stdout,'(2x,2(a,i0))') 'ConfSolv server will be running at http://localhost:',cs_port,' with PID ',cs_pid
+    write (stdout,'(2x,2(a,i0))') 'ConfSolv server will be running at http://localhost:',cs_port,' with PID ',cs_pid
   end subroutine cs_deploy
 
 !=================================================!
@@ -152,11 +183,15 @@ subroutine cs_write_config(ensname,threads)
     if (allocated(cs_param)) then
       call wrtoml(ich,'model_path',cs_param)
     end if
-    
+
     call wrtoml(ich,'xyz_file',trim(atmp)//'/'//trim(ensname))
-    call wrtoml(ich,'solvent_file',trim(atmp)//'/'//'crest_solvents.csv')
+    if (allocated(cs_solvfile)) then
+      call wrtoml(ich,'solvent_file',trim(atmp)//'/'//cs_solvfile)
+    else
+      call wrtoml(ich,'solvent_file',trim(atmp)//'/'//'crest_solvents.csv')
+    end if
 
-    call wrtoml_int(ich,'n_threshold_mols',1)
+    call wrtoml_int(ich,'n_threshold_mols',cs_n_threshold_mols)
     close (ich)
   contains
     subroutine wrtoml(ch,key,val)
@@ -314,13 +349,69 @@ subroutine cs_write_solvent_csv(solvent,smiles,ch)
       case ('urea')
         write (ich,'(a,",",a)') solvent,'NC(=O)N'
       case default
-        write(stderr,'(2a)') '**ERROR** failed to find matching solvent SMILES for: ',solvent
+        write (stderr,'(2a)') '**ERROR** failed to find matching solvent SMILES for: ',solvent
         error stop
       end select
     end if
     close (ich)
   end subroutine cs_write_solvent_csv
 
+!========================================================================================!
+
+  subroutine confsolv_select_gsoln(nall,ncol,data,gsoln,mapping)
+!************************************************
+!* From the matrix of ΔΔGsoln, select the best
+!* for each conformer and document which solvent
+!* that corresponds to
+!************************************************
+    implicit none
+    integer,intent(in)  :: nall,ncol
+    real(wp),intent(in)  :: data(ncol,nall)
+    real(wp),intent(out) :: gsoln(nall)
+    integer,intent(out)  :: mapping(nall)
+    integer :: i,j,k,l,mink
+    real(wp) :: dum
+    mapping(:) = 0
+    gsoln(:) = huge(dum)
+    if (ncol < 3) then
+!>--- ConfSolv should put out at least 3 csv columns. The first two are just IDs
+      write (stderr,'(a)') '**ERROR** dimension mismatch in ConfSolv data processing'
+      error stop
+    end if
+    do i = 1,nall
+      do j = 3,ncol
+        k = j-2
+        if (data(j,i) < gsoln(i)) then
+          mink = k
+          gsoln(i) = data(j,i)
+        end if
+      end do
+      mapping(i) = mink
+    end do
+  end subroutine confsolv_select_gsoln
+
+
+  subroutine confsolv_dump_gsoln(nall,ncol,gsoln,mapping,headers)
+!****************************************************
+!* Dump the selected ΔΔGsoln, and the corresponding
+!* solvent for each conformer
+!****************************************************
+    implicit none
+    integer,intent(in)  :: nall,ncol
+    real(wp),intent(in) :: gsoln(nall)
+    integer,intent(in)  :: mapping(nall)
+    character(len=*),intent(in) :: headers(ncol)
+    integer :: i,j,k,l,mink,ich
+    real(wp) :: dum
+    open(newunit=ich,file='confsolv.dat')
+    do i = 1,nall
+      k = mapping(i)+2
+      write(ich,'(f15.8,1x,a)') gsoln(i),trim(headers(k))
+    end do
+    close(ich)
+  end subroutine confsolv_dump_gsoln
+
+
 !========================================================================================!
 !========================================================================================!
 end module ConfSolv_module
@@ -338,7 +429,7 @@ subroutine confsolv_request(ensname,nall,ncpus,gsoln,io)
 !*  nall    - number of structures in ensemble
 !*  ncpus   - number on cores to run on
 !*  gsoln   - ΔΔGsoln for each structure
-!*  io      - exit status 
+!*  io      - exit status
 !***********************************************
   use crest_parameters
   use crest_data
@@ -359,6 +450,10 @@ subroutine confsolv_request(ensname,nall,ncpus,gsoln,io)
   logical :: pr,wr
   character(len=:),allocatable :: job
   real(wp),allocatable :: column(:)
+  real(wp),allocatable :: data(:,:)
+  integer,allocatable :: mapping(:)
+  character(len=:),allocatable :: headers(:) 
+  integer :: ncol,nrow
   real(wp) :: avg
 
   io = 0
@@ -366,14 +461,14 @@ subroutine confsolv_request(ensname,nall,ncpus,gsoln,io)
 
 !>--- setup
   if (allocated(cs_pid).and.allocated(cs_port)) then
-  !> user-provided PID and port (no automatic teardown)
+    !> user-provided PID and port (no automatic teardown)
     write (stdout,'(2x,a,i0,a,i0)') 'Looking for ConfSolv server (PID: ',cs_pid,') running at '//&
     & 'http://localhost:',cs_port
-    !cs_teardown = .false. 
+    !cs_teardown = .false.
   else
-  !> fallback: automatic host (not recommended)
-    allocate(cs_pid,cs_port)
-    call cs_deploy() 
+    !> fallback: automatic host (not recommended)
+    allocate (cs_pid,cs_port)
+    call cs_deploy()
   end if
   if (allocated(cs_param)) then
     write (stdout,'(2x,a,/,3x,a)') 'pyTorch checkpoint files located at ',cs_param
@@ -381,81 +476,98 @@ subroutine confsolv_request(ensname,nall,ncpus,gsoln,io)
     write (stderr,*) '**ERROR** cannot run ConfSolv without defining checkpoint file location!'
     error stop
   end if
-  if (allocated(cs_solvent).and.allocated(cs_smiles)) then
-    write (stdout,'(2x,a,a,3a)') 'Requested ΔΔGsoln for ',cs_solvent,' (SMILES: ',trim(cs_smiles),')'
-    call cs_write_solvent_csv(cs_solvent,smiles=cs_smiles)
-  else if(allocated(cs_solvent))then
-    write (stdout,'(2x,a,a,a)') 'Requested ΔΔGsoln for ',cs_solvent,' (trying to find SMILES ...)'
-    call cs_write_solvent_csv(cs_solvent)
+  !> pass the user-defined solvents-csv, or do a single solvent
+  if (allocated(cs_solvfile)) then
+    write (stdout,'(2x,a,a)') 'Requested ΔΔGsoln for solvents in ',cs_solvfile
+  else
+    if (allocated(cs_solvent).and.allocated(cs_smiles)) then
+      write (stdout,'(2x,a,a,3a)') 'Requested ΔΔGsoln for ',cs_solvent,' (SMILES: ',trim(cs_smiles),')'
+      call cs_write_solvent_csv(cs_solvent,smiles=cs_smiles)
+    else if (allocated(cs_solvent)) then
+      write (stdout,'(2x,a,a,a)') 'Requested ΔΔGsoln for ',cs_solvent,' (trying to find SMILES ...)'
+      call cs_write_solvent_csv(cs_solvent)
+    end if
   end if
   write (stdout,'(2x,a,a)') 'Processing ensemble file ',trim(ensname)
 
 !>---- creating the request configuration
   write (stdout,'(2x,a)',advance='no') 'Writing config.toml file              ...'
-  flush(stdout) 
+  flush (stdout)
   call cs_write_config(ensname,ncpus)
-  write(stdout,*) 'done.'
-
+  write (stdout,*) 'done.'
 
   job = ''
   job = trim(job)//' '//cs_bin//' -c config.toml'
 !>----- this should only be called once:
-  if(.not.cs_setup)then
-    write(stdout,'(2x,a)',advance='no') 'Instructing ConfSolv model setup      ...'
-    flush(stdout)
+  if (.not.cs_setup) then
+    write (stdout,'(2x,a)',advance='no') 'Instructing ConfSolv model setup      ...'
+    flush (stdout)
     call command(trim(job)//' -s >> confsolv.out 2>/dev/null',io)
-    if(io/=0)then
-      write(stdout,*)
-      write(stderr,'(a)')"**ERROR** failed request to ConfSolv server"
+    if (io /= 0) then
+      write (stdout,*)
+      write (stderr,'(a)') "**ERROR** failed request to ConfSolv server"
       call cs_shutdown(io)
       error stop
-    endif 
+    end if
     cs_setup = .true.
-    write(stdout,*) 'done.'
-  endif 
+    write (stdout,*) 'done.'
+  end if
 
 !>---- and then the actual evaluation
   call remove('confsolv.csv')
   call remove('confsolv_uncertainty.csv')
-  write(stdout,'(2x,a)',advance='no') 'Evaluation of ConfSolv D-MPNN         ...'
-  flush(stdout) 
+  write (stdout,'(2x,a)',advance='no') 'Evaluation of ConfSolv D-MPNN         ...'
+  flush (stdout)
   call command(trim(job)//' -r >> confsolv.out 2>/dev/null',io)
-  write(stdout,*) 'done.'
-  if(io/=0)then
-    write(stdout,*)
-    write(stderr,'(a)')"**ERROR** failed request to ConfSolv server"
+  write (stdout,*) 'done.'
+  if (io /= 0) then
+    write (stdout,*)
+    write (stderr,'(a)') "**ERROR** failed request to ConfSolv server"
     call cs_shutdown(io)
     error stop
-  endif
+  end if
 
 !>--- read ΔΔGsoln
-  write(stdout,'(2x,a)',advance='no') 'Reading confsolv.csv                  ...'
-  flush(stdout)
-  call parse_csv_column_real('confsolv.csv',cs_solvent,column)
-  write(stdout,*) 'done.'
-  if(size(column,1) == nall)then
-    gsoln(:) = column(:)
+  write (stdout,'(2x,a)',advance='no') 'Reading confsolv.csv                  ...'
+  flush (stdout)
+  call parse_csv_allcolumns('confsolv.csv',data,cols=ncol,rows=nrow)
+  write (stdout,*) 'done.'
+  if (nrow == nall) then
+    allocate(mapping(nall))
+    call confsolv_select_gsoln(nall,ncol,data,gsoln,mapping)
+    call parse_csv_file_row('confsolv.csv',1,headers)
+    call confsolv_dump_gsoln(nall,ncol,gsoln,mapping,headers) 
   else
-    write(stdout,'(a)') '**ERROR** dimension mismatch in confsolv_request'
+    write (stdout,'(a)') '**ERROR** dimension mismatch in confsolv_request'
     call cs_shutdown(io)
     error stop
-  endif
+  end if
 
 !>--- read uncertainty
-  write(stdout,'(2x,a)',advance='no') 'Reading confsolv_uncertainty.csv      ...'
-  flush(stdout)
-  call parse_csv_column_real('confsolv_uncertainty.csv',cs_solvent,column)
-  write(stdout,*) 'done.'
-  if(size(column,1) == nall)then
-    avg = sum(column(:))/real(nall,wp)
-    write(stdout,'(2x,a,f25.15)') 'Average uncertainty of ConfSolv prediction:',avg
+  write (stdout,'(2x,a)',advance='no') 'Reading confsolv_uncertainty.csv      ...'
+  flush (stdout)
+  call parse_csv_allcolumns('confsolv_uncertainty.csv',data)
+  write (stdout,*) 'done.'
+  if (size(data,2) == nall) then
+    avg = 0.0_wp
+    do i=1,nall
+      k=mapping(i) + 2
+      avg=avg+data(k,i)
+    enddo
+    avg = avg / real(nall,wp) 
+    write (stdout,'(2x,a,f25.15)') 'Average uncertainty of ConfSolv prediction:',avg
   else
-    write(stdout,'(a)') '**ERROR** dimension mismatch in confsolv_request'
+    write (stdout,'(a)') '**ERROR** dimension mismatch in confsolv_request'
     call cs_shutdown(io)
     error stop
-  endif
+  end if
 
-  if(allocated(column)) deallocate(column)
+  !call cs_shutdown2(i)
+  
+  if (allocated(headers)) deallocate(headers)
+  if (allocated(data)) deallocate(data)
+  if (allocated(mapping)) deallocate(mapping) 
+  if (allocated(column)) deallocate (column)
   return
 end subroutine confsolv_request
 
diff --git a/src/algos/playground.f90 b/src/algos/playground.f90
index 6ec9f86f..4f379a26 100644
--- a/src/algos/playground.f90
+++ b/src/algos/playground.f90
@@ -37,6 +37,8 @@ subroutine crest_playground(env,tim)
   use adjacency
   use zdata
   use probabilities_module
+  use parse_csv
+  use cregen_interface
   implicit none
   type(systemdata),intent(inout) :: env
   type(timer),intent(inout)      :: tim
@@ -55,7 +57,7 @@ subroutine crest_playground(env,tim)
   type(zmolecule) :: zmol
 
   real(wp) :: energy
-  real(wp),allocatable :: grad(:,:),geo(:,:)
+  real(wp),allocatable :: grad(:,:),geo(:,:),csv(:,:)
   integer,allocatable :: na(:),nb(:),nc(:),at2(:)
   integer :: nat2
   real(wp),allocatable :: mu(:)
@@ -78,6 +80,7 @@ subroutine crest_playground(env,tim)
   write(*,*) 
 !========================================================================================!
 
+  call newcregen(env,0,'foobar.xyz') 
 
 !========================================================================================!
   call tim%stop(14)
diff --git a/src/algos/refine.f90 b/src/algos/refine.f90
index 8f8767fb..65e96e9c 100644
--- a/src/algos/refine.f90
+++ b/src/algos/refine.f90
@@ -30,6 +30,7 @@ subroutine crest_refine(env,input,output)
   use crest_data
   use crest_calculator
   use strucrd
+  use cregen_interface
   implicit none
   type(systemdata),intent(inout) :: env
   character(len=*),intent(in) :: input
@@ -54,6 +55,14 @@ subroutine crest_refine(env,input,output)
   else
     outname = input  !> overwrite
   end if
+
+!>--- presorting step, if necessary
+  if(env%refine_presort)then
+    call newcregen(env,0,input)
+    call rename('crest_ensemble.xyz',input)
+  endif
+
+!>--- read in
   call rdensemble(input,nat,nall,at,xyz,eread)
   allocate (etmp(nall),source=0.0_wp)
 !>>>>>>>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<<<<<<<<<<<!
@@ -91,11 +100,9 @@ subroutine crest_refine(env,input,output)
       case(refine%confsolv)
         write (stdout,'("> ConfSolv: ΔΔGsoln estimation from 3D directed message passing neural networks (D-MPNN)")')
         call confsolv_request( input, nall, env%threads, etmp, io)
-        !eread(:) = eread(:) + etmp(:)
         if(io == 0)then
         eread(:) = etmp(:)*kcaltoau  !> since CREGEN deals with Eh energies
         endif   
-
       end select
       write(stdout,*) 
     end do
diff --git a/src/algos/search_conformers.f90 b/src/algos/search_conformers.f90
index 2d82baf9..09d34f16 100644
--- a/src/algos/search_conformers.f90
+++ b/src/algos/search_conformers.f90
@@ -33,6 +33,7 @@ subroutine crest_search_imtdgc(env,tim)
   use shake_module
   use iomod
   use utilities
+  use cregen_interface
   implicit none
   type(systemdata),intent(inout) :: env
   type(timer),intent(inout)      :: tim
diff --git a/src/algos/search_entropy.f90 b/src/algos/search_entropy.f90
index 5c855809..028b81bc 100644
--- a/src/algos/search_entropy.f90
+++ b/src/algos/search_entropy.f90
@@ -32,6 +32,7 @@ subroutine crest_search_entropy(env,tim)
   use shake_module
   use iomod
   use utilities
+  use cregen_interface
   implicit none
   type(systemdata),intent(inout) :: env
   type(timer),intent(inout)      :: tim
diff --git a/src/algos/search_mecp.f90 b/src/algos/search_mecp.f90
index 096805cf..69b818cb 100644
--- a/src/algos/search_mecp.f90
+++ b/src/algos/search_mecp.f90
@@ -6,6 +6,7 @@ subroutine crest_search_mecp(env,tim)
   use strucrd
   use dynamics_module
   use shake_module
+  use cregen_interface
   implicit none
   type(systemdata),intent(inout) :: env
   type(timer),intent(inout)      :: tim
diff --git a/src/algos/search_newnci.f90 b/src/algos/search_newnci.f90
index 2d7a20b7..bc6b88c5 100644
--- a/src/algos/search_newnci.f90
+++ b/src/algos/search_newnci.f90
@@ -30,6 +30,7 @@ subroutine crest_search_newnci(env,tim)
   use shake_module
   use iomod
   use utilities
+  use cregen_interface
   implicit none
   type(systemdata),intent(inout) :: env
   type(timer),intent(inout)      :: tim
diff --git a/src/classes.f90 b/src/classes.f90
index 4d529941..26ce0c7e 100644
--- a/src/classes.f90
+++ b/src/classes.f90
@@ -537,6 +537,8 @@ module crest_data
     logical :: reweight = .false.    !> reweight structures on the fly after optimizations (i.e. do SPs)?
     logical :: riso = .false.        !> take only isomers in reactor mode
     logical :: rotamermds            !> do additional MDs after second  multilevel OPT step in V2 ?
+    logical :: refine_presort = .false.  !> run CREGEN at the beginning of crest_refine?
+    logical :: refine_esort   = .false.  !> if CREGEN is run after crest_refine, only sort energy?
     logical :: sameRandomNumber = .false. !> QCG related, choose same random number for iff
     logical :: scallen               !> scale the automatically determined MD length by some factor?
     logical :: scratch               !> use scratch directory
diff --git a/src/cregen.f90 b/src/cregen.f90
index e6a098c9..60683d4c 100644
--- a/src/cregen.f90
+++ b/src/cregen.f90
@@ -31,14 +31,40 @@
 !=========================================================================================!
 !=========================================================================================!
 
-subroutine newcregen(env,quickset)
+module cregen_interface
+!*******************************************************
+!* module to load an interface to the newcregen routine
+!* mandatory to handle the optional input arguments
+!*******************************************************
+  implicit none
+  interface
+    subroutine newcregen(env,quickset,infile)
+      use crest_parameters
+      use crest_data
+      use crest_restartlog
+      use strucrd
+      implicit none
+      type(systemdata),intent(inout) :: env
+      integer,intent(in),optional :: quickset
+      character(len=*),intent(in),optional :: infile
+    end subroutine newcregen
+  end interface
+end module cregen_interface
+
+subroutine newcregen(env,quickset,infile)
+!****************************
+!* The main CREGEN routine
+!****************************
   use crest_parameters
   use crest_data
   use crest_restartlog
   use strucrd
   implicit none
-  type(systemdata) :: env      !> MAIN STORAGE OS SYSTEM DATA
-  integer,optional :: quickset !> quick access to predefined CREGEN modes
+  !> INPUT
+  type(systemdata),intent(inout) :: env   !> MAIN STORAGE OS SYSTEM DATA
+  integer,intent(in),optional :: quickset !> quick access to predefined CREGEN modes
+  character(len=*),intent(in),optional :: infile
+  !> LOCAL
   integer :: simpleset
   character(len=258) :: fname  !> input file
   character(len=258) :: oname  !> sorted output file
@@ -61,14 +87,8 @@ subroutine newcregen(env,quickset)
   integer,allocatable :: degen(:,:)
 
 !>--- float data
-  real(wp) :: ewin
-  real(wp) :: rthr
-  real(wp) :: bthr
-  real(wp) :: pthr
-  real(wp) :: ethr
-  real(wp) :: athr
-  real(wp) :: T
-  real(wp) :: couthr
+  real(wp) :: ewin,rthr,bthr,pthr,ethr,athr
+  real(wp) :: T,couthr
 
 !>--- boolean data
   logical :: checkbroken
@@ -83,6 +103,7 @@ subroutine newcregen(env,quickset)
   logical :: bonusfiles
   logical :: anal
   logical :: saveelow = .true.
+  logical :: userinput
 
 !>--- printout directions
   integer :: prch  !> the main printout channel
@@ -101,9 +122,18 @@ subroutine newcregen(env,quickset)
   end if
 
 !>-- determine filenames and output channel
-  call cregen_files(env,fname,oname,cname,simpleset,prch)
+  if (present(infile)) then
+    fname = trim(infile)
+    userinput = .true.
+  else
+    fname = trim(env%ensemblename)
+    userinput = .false.
+  end if
+  call cregen_files(env,fname,oname,cname,simpleset,userinput,prch)
+
 !>-- determine which printouts are required
   call cregen_prout(env,simpleset,pr1,pr2,pr3,pr4)
+
 !>-- determine which subroutines are required
   call cregen_director(env,simpleset,checkbroken,sortE,sortRMSD,sortRMSD2, &
   &  repairord,newfile,conffile,bonusfiles,anal,topocheck,checkez)
@@ -275,7 +305,7 @@ end subroutine newcregen
 !=========================================================================================!
 !=========================================================================================!
 
-subroutine cregen_files(env,fname,oname,cname,simpleset,iounit)
+subroutine cregen_files(env,fname,oname,cname,simpleset,userinput,iounit)
 !*************************************************************
 !* subroutine cregen_files
 !* handle all settings regarding input and output file names
@@ -291,6 +321,7 @@ subroutine cregen_files(env,fname,oname,cname,simpleset,iounit)
   character(len=*) :: oname  !> output ensemble name (including rotamers)
   character(len=*) :: cname  !> output ensemble name (only conformers)
   integer,intent(in) :: simpleset
+  logical,intent(in) :: userinput !> was an input file given via the optional subroutine arg?
   integer,intent(out) :: iounit
   character(len=:),allocatable :: outfile
   logical :: ex
@@ -314,10 +345,12 @@ subroutine cregen_files(env,fname,oname,cname,simpleset,iounit)
     open (newunit=iounit,file=outfile)
   end if
 
-  fname = trim(env%ensemblename)
-  if (env%confgo.and.(index(trim(fname),'none selected') .eq. 0)) then
-    fname = trim(env%ensemblename)
-    oname = trim(env%ensemblename)//'.sorted'
+  if ((env%confgo.and.(index(trim(fname),'none selected') .eq. 0)) &
+  &    .OR.userinput) then
+    if (.not.userinput) then
+      fname = trim(env%ensemblename)
+    end if
+    oname = trim(fname)//'.sorted'
     cname = 'crest_ensemble.xyz'
     if (env%fullcre) then
       env%ensemblename = trim(oname)
@@ -414,18 +447,18 @@ subroutine cregen_director(env,simpleset,checkbroken,sortE,sortRMSD,sortRMSD2, &
   logical,intent(out) :: topocheck
   logical,intent(out) :: checkez
 
-  checkbroken = .true.  !fragmentized structures are sorted out
-  sortE = .true.  !sort based on energy
-  sortRMSD = .true.  !sort based on RMSD
-  sortRMSD2 = .false. !check groups for whole ensemble
-  repairord = .true.  !double-check the sorted Ensemble
+  checkbroken = .true. !> fragmentized structures are sorted out
+  sortE = .true.       !> sort based on energy
+  sortRMSD = .true.    !> sort based on RMSD
+  sortRMSD2 = .false.  !> check groups for whole ensemble
+  repairord = .true.   !> double-check the sorted Ensemble
 
-  newfile = .true.  !sorted input file
+  newfile = .true.  !> sorted input file
 
-  conffile = .true. !sorted unique structure file
+  conffile = .true. !> sorted unique structure file
 
-  topocheck = env%checktopo !topology is compared to reference structure
-  checkez = env%checkiso  !check for C=C cis/trans isomerizations
+  topocheck = env%checktopo !> topology is compared to reference structure
+  checkez = env%checkiso    !> check for C=C cis/trans isomerizations
   if (env%relax) then
     topocheck = .false.
   end if
@@ -514,10 +547,7 @@ subroutine cregen_filldata2(simpleset,ewin)
   implicit none
   integer,intent(in) :: simpleset
   real(wp),intent(out) :: ewin
-  if (simpleset == 6) then
-    ewin = 100000
-  end if
-  if (simpleset == 12) then
+  if (simpleset == 6.or.simpleset == 12) then
     ewin = huge(ewin)
   end if
   return
@@ -580,7 +610,7 @@ subroutine discardbroken(ch,env,topocheck,nat,nall,at,xyz,comments,newnall)
   real(wp),intent(inout) :: xyz(3,nat,nall)
   character(len=*),intent(inout) :: comments(nall)
   integer,intent(out) :: newnall
-  !> LOCAL 
+  !> LOCAL
   integer :: llan
   integer,allocatable :: order(:),orderref(:)
   integer :: nat0
@@ -841,9 +871,9 @@ subroutine cregen_topocheck(ch,env,checkez,nat,nall,at,xyz,comments,newnall)
 
 contains
   subroutine nezcc(nat,at,xyz,cn,ntopo,topo,ncc)
-  !***************************************************
-  !* Check how many (potential) C=C bonds are present
-  !***************************************************
+    !***************************************************
+    !* Check how many (potential) C=C bonds are present
+    !***************************************************
     use crest_parameters
     integer,intent(in)  :: nat
     integer,intent(in)  :: at(nat)
@@ -877,9 +907,9 @@ subroutine nezcc(nat,at,xyz,cn,ntopo,topo,ncc)
     return
   end subroutine nezcc
   subroutine ezccat(nat,at,xyz,cn,ntopo,topo,ncc,ezat)
-  !********************************************************
-  !* Check which atoms can be used for C=C dihedral angles
-  !********************************************************
+    !********************************************************
+    !* Check which atoms can be used for C=C dihedral angles
+    !********************************************************
     use crest_parameters
     integer,intent(in)  :: nat
     integer,intent(in)  :: at(nat)
@@ -935,9 +965,9 @@ subroutine ezccat(nat,at,xyz,cn,ntopo,topo,ncc,ezat)
     return
   end subroutine ezccat
   subroutine ezccdihed(nat,xyz,ncc,ezat,ezdihed)
-  !********************************************************
-  !* Check which atoms can be used for C=C dihedral angles
-  !********************************************************
+    !********************************************************
+    !* Check which atoms can be used for C=C dihedral angles
+    !********************************************************
     use crest_parameters
     integer,intent(in)  :: nat
     real(wp),intent(in) :: xyz(3,nat)
@@ -2068,7 +2098,7 @@ subroutine cregen_EQUAL(ch,nat,nall,at,xyz,group,athr,rotfil)
     if (k .gt. 2) then
       equiv(0,i,ig) = k    !> CH3 etc
     else
-      equiv(0,i,ig) = 1    !> this makes CH2-CH2 not mag. equiv. 
+      equiv(0,i,ig) = 1    !> this makes CH2-CH2 not mag. equiv.
     end if
   end do
   jnd = 1
@@ -2484,7 +2514,7 @@ end subroutine cregen_conffile
 
 subroutine cregen_rmsdalign(nat,nall,at,xyz)
 !*****************************************************
-!* Algin all structures in xyz to the first structure 
+!* Algin all structures in xyz to the first structure
 !* in the ensemble based on the heavy-atom RMSD
 !*****************************************************
   use crest_parameters,only:wp
diff --git a/src/crest_main.f90 b/src/crest_main.f90
index cce6f32f..1ec63ad6 100644
--- a/src/crest_main.f90
+++ b/src/crest_main.f90
@@ -111,7 +111,10 @@ program CREST
       end if
     end if
     if (env%newcregen) then
+      block
+      use cregen_interface
       call newcregen(env,0)
+      end block
     else
       call cregen2(env)
     end if
diff --git a/src/ensemblecomp.f90 b/src/ensemblecomp.f90
index a68ce050..06364218 100644
--- a/src/ensemblecomp.f90
+++ b/src/ensemblecomp.f90
@@ -23,361 +23,360 @@
 !   crest --compare <ensemble1> <ensemble2>
 !================================================================================!
 subroutine compare_ensembles(env)
-         use iso_fortran_env, only : wp => real64
-         use ls_rmsd
-         use iomod
-         use crest_data
-         use strucrd, only: rdensembleparam,rdensemble
-         implicit none
-
-         type(systemdata) :: env
-
-         integer :: i,j,k,l,kk,ll
-         integer :: be,ed
-         integer :: tr1,tr2
-         integer :: nat
-         integer :: dum
-         integer :: nat1,nat2
-         integer :: nall1,nall2
-         integer :: nconf1,nconf2
-         integer :: nrot1,nrot2
-         integer :: rcount
-         integer :: ich,ich2
-
-         real(wp) :: rthr
-         real(wp) :: rval
-         real(wp) :: min_1,min_2,min_tot
-
-         real(wp),allocatable :: gdum(:,:),Udum(:,:),xdum(:), ydum(:)  ! rmsd dummy stuff
-         real(wp),allocatable :: rmat(:,:),rmat2(:)
-         real(wp),allocatable :: c1(:,:),c2(:,:)
-         real(wp),allocatable :: xyz1(:,:,:),xyz2(:,:,:)
-         real(wp),allocatable :: conf1(:,:,:),conf2(:,:,:)
-         real(wp),allocatable :: eread1(:),eread2(:),en1(:),en2(:)
-         integer,allocatable  :: at1(:),at2(:) 
-         integer,allocatable  :: b1(:),b2(:)
-         integer,allocatable  :: e1(:),e2(:)
-         integer,allocatable  :: rots1(:),rots2(:)
-
-         character(len=128) :: ensname1,ensname1backup
-         character(len=128) :: ensname2
-         character(len=60)  :: track1,track2
-         character(len=6)   :: connect
-
-         logical :: matching,ex
-
-         real(wp),parameter :: autokcal = 627.509541d0
-
-         associate( ensemblename => env%ensemblename, ensemblename2 => env%ensemblename2, &
-         & thresholds => env%thresholds, cgf => env%cgf, maxcompare => env%maxcompare)
-         nat = env%nat
+  use iso_fortran_env,only:wp => real64
+  use ls_rmsd
+  use iomod
+  use crest_data
+  use strucrd,only:rdensembleparam,rdensemble
+  use cregen_interface
+  implicit none
+
+  type(systemdata) :: env
+
+  integer :: i,j,k,l,kk,ll
+  integer :: be,ed
+  integer :: tr1,tr2
+  integer :: nat
+  integer :: dum
+  integer :: nat1,nat2
+  integer :: nall1,nall2
+  integer :: nconf1,nconf2
+  integer :: nrot1,nrot2
+  integer :: rcount
+  integer :: ich,ich2
+
+  real(wp) :: rthr
+  real(wp) :: rval
+  real(wp) :: min_1,min_2,min_tot
+
+  real(wp),allocatable :: gdum(:,:),Udum(:,:),xdum(:),ydum(:)  ! rmsd dummy stuff
+  real(wp),allocatable :: rmat(:,:),rmat2(:)
+  real(wp),allocatable :: c1(:,:),c2(:,:)
+  real(wp),allocatable :: xyz1(:,:,:),xyz2(:,:,:)
+  real(wp),allocatable :: conf1(:,:,:),conf2(:,:,:)
+  real(wp),allocatable :: eread1(:),eread2(:),en1(:),en2(:)
+  integer,allocatable  :: at1(:),at2(:)
+  integer,allocatable  :: b1(:),b2(:)
+  integer,allocatable  :: e1(:),e2(:)
+  integer,allocatable  :: rots1(:),rots2(:)
+
+  character(len=128) :: ensname1,ensname1backup
+  character(len=128) :: ensname2
+  character(len=60)  :: track1,track2
+  character(len=6)   :: connect
+
+  logical :: matching,ex
+
+  real(wp),parameter :: autokcal = 627.509541d0
+
+  associate (ensemblename => env%ensemblename,ensemblename2 => env%ensemblename2, &
+  & thresholds => env%thresholds,cgf => env%cgf,maxcompare => env%maxcompare)
+    nat = env%nat
 
 !----
-      call compens_cleanup()
+    call compens_cleanup()
 
 !---- check if two valid files were given
-      inquire(file=ensemblename,exist=ex)
-      if(.not.ex)then
-        write(*,'(2x,a,a,a)')'File <',trim(ensemblename),'> does not exist!'
-        error stop
-      endif
-      inquire(file=ensemblename2,exist=ex)
-      if(.not.ex)then
-        write(*,'(2x,a,a,a)')'File <',trim(ensemblename2),'> does not exist!'
-        error stop
-      endif
-
-         
+    inquire (file=ensemblename,exist=ex)
+    if (.not.ex) then
+      write (*,'(2x,a,a,a)') 'File <',trim(ensemblename),'> does not exist!'
+      error stop
+    end if
+    inquire (file=ensemblename2,exist=ex)
+    if (.not.ex) then
+      write (*,'(2x,a,a,a)') 'File <',trim(ensemblename2),'> does not exist!'
+      error stop
+    end if
+
 !---- settings
-      allocate(gdum(3,3),Udum(3,3),xdum(3),ydum(3))
-      rthr = env%rthr      ! RMSD threshold
+    allocate (gdum(3,3),Udum(3,3),xdum(3),ydum(3))
+    rthr = env%rthr      ! RMSD threshold
 
-      ensname1='ensemble1.inp.xyz'
-      ensname2='ensemble2.inp.xyz'
-      track1='track.1'
-      track2='track.2'
+    ensname1 = 'ensemble1.inp.xyz'
+    ensname2 = 'ensemble2.inp.xyz'
+    track1 = 'track.1'
+    track2 = 'track.2'
 
 !---- sort the input files with CREGEN
-      env%confgo=.true. !needs to be activated here     
+    env%confgo = .true. !needs to be activated here
 !----- first file
-      call smallhead('Sorting file <'//trim(ensemblename)//'>')
-      ensname1backup=trim(ensemblename)
-
-          call newcregen(env,0)
-
-      call rename(trim(ensemblename)//'.sorted',trim(ensname1))
-      call rename('.cretrack',track1)
-      call rdensembleparam(trim(ensname1),nat1,nall1)
-      allocate(xyz1(3,nat1,nall1),eread1(nall1),at1(nat1))
-      call rdensemble(trim(ensname1),nat1,nall1,at1,xyz1,eread1)
-   !---- read tracking information (which conformer has which rotamers from the files?)
-      open(newunit=tr1,file=track1)
-      read(tr1,*) nconf1  !ensemble 1 has "nconf1" conformers
-      !to compare a limited number "maxcompare" conformers, "nconf1/2" is normalized to maxcompare
-      !i.e. if nconf > maxcompare, nconf is set to maxcompare; if nconf < maxcompare, it is not changed
-      dum=nconf1
-      nconf1=min(nconf1,maxcompare)
-   !---- set up the mapping
-      allocate(b1(nconf1),e1(nconf1),rots1(nconf1))
-      do i=1,nconf1
-         read(tr1,*)l,b1(i),e1(i)
-         rots1(i)=e1(i) - b1(i) + 1
-      enddo
-      close(tr1)
-      write(*,'(2x,a,a,a,i0,a)')'File <',trim(ensemblename),'> contains ',dum,' conformers.'
-      if(dum.lt.maxcompare)then
-         write(*,'(2x,a,i0,a)')'All of the ',dum,' conformers will be taken for the comparison:'
-      else
-         write(*,'(2x,a,i0,a)')'The ',nconf1,' lowest conformers will be taken for the comparison:'
-      endif
-      write(*,'(2x,a9,2x,a9)')'conformer','#rotamers'
-      do i=1,nconf1
-       write(*,'(i9,2x,i9)')i,rots1(i)
-      enddo
-      write(*,*)
+    call smallhead('Sorting file <'//trim(ensemblename)//'>')
+    ensname1backup = trim(ensemblename)
+
+    call newcregen(env,0)
+
+    call rename(trim(ensemblename)//'.sorted',trim(ensname1))
+    call rename('.cretrack',track1)
+    call rdensembleparam(trim(ensname1),nat1,nall1)
+    allocate (xyz1(3,nat1,nall1),eread1(nall1),at1(nat1))
+    call rdensemble(trim(ensname1),nat1,nall1,at1,xyz1,eread1)
+    !---- read tracking information (which conformer has which rotamers from the files?)
+    open (newunit=tr1,file=track1)
+    read (tr1,*) nconf1  !ensemble 1 has "nconf1" conformers
+    !to compare a limited number "maxcompare" conformers, "nconf1/2" is normalized to maxcompare
+    !i.e. if nconf > maxcompare, nconf is set to maxcompare; if nconf < maxcompare, it is not changed
+    dum = nconf1
+    nconf1 = min(nconf1,maxcompare)
+    !---- set up the mapping
+    allocate (b1(nconf1),e1(nconf1),rots1(nconf1))
+    do i = 1,nconf1
+      read (tr1,*) l,b1(i),e1(i)
+      rots1(i) = e1(i)-b1(i)+1
+    end do
+    close (tr1)
+    write (*,'(2x,a,a,a,i0,a)') 'File <',trim(ensemblename),'> contains ',dum,' conformers.'
+    if (dum .lt. maxcompare) then
+      write (*,'(2x,a,i0,a)') 'All of the ',dum,' conformers will be taken for the comparison:'
+    else
+      write (*,'(2x,a,i0,a)') 'The ',nconf1,' lowest conformers will be taken for the comparison:'
+    end if
+    write (*,'(2x,a9,2x,a9)') 'conformer','#rotamers'
+    do i = 1,nconf1
+      write (*,'(i9,2x,i9)') i,rots1(i)
+    end do
+    write (*,*)
 
 !----- second file
-      ensemblename=ensemblename2 !done so that CREGEn sorts the correct file
-      call smallhead('Sorting file <'//trim(ensemblename2)//'>')
+    ensemblename = ensemblename2 !done so that CREGEn sorts the correct file
+    call smallhead('Sorting file <'//trim(ensemblename2)//'>')
 
-          call newcregen(env,0)
+    call newcregen(env,0)
 
-      ensemblename=trim(ensname1backup)
-      call rename(trim(ensemblename2)//'.sorted',trim(ensname2))
-      call rename('.cretrack',track2)
-      call rdensembleparam(trim(ensname2),nat2,nall2)
-      allocate(xyz2(3,nat2,nall2),eread2(nall2),at2(nat2))
-      call rdensemble(trim(ensname2),nat2,nall2,at2,xyz2,eread2)
+    ensemblename = trim(ensname1backup)
+    call rename(trim(ensemblename2)//'.sorted',trim(ensname2))
+    call rename('.cretrack',track2)
+    call rdensembleparam(trim(ensname2),nat2,nall2)
+    allocate (xyz2(3,nat2,nall2),eread2(nall2),at2(nat2))
+    call rdensemble(trim(ensname2),nat2,nall2,at2,xyz2,eread2)
 !---- read tracking information (which conformer has which rotamers from the files?)
-      open(newunit=tr2,file=track2)
-      read(tr2,*) nconf2  !ensemble 2 has "nconf2" conformers
-      !to compare a limited number "maxcompare" conformers, "nconf1/2" is normalized to maxcompare
-      !i.e. if nconf > maxcompare, nconf is set to maxcompare; if nconf < maxcompare, it is not changed
-      dum=nconf2
-      nconf2=min(nconf2,maxcompare)
+    open (newunit=tr2,file=track2)
+    read (tr2,*) nconf2  !ensemble 2 has "nconf2" conformers
+    !to compare a limited number "maxcompare" conformers, "nconf1/2" is normalized to maxcompare
+    !i.e. if nconf > maxcompare, nconf is set to maxcompare; if nconf < maxcompare, it is not changed
+    dum = nconf2
+    nconf2 = min(nconf2,maxcompare)
 !-----set up the mapping
-      allocate(b2(nconf2),e2(nconf2),rots2(nconf2))
-      do i=1,nconf2
-         read(tr2,*)l,b2(i),e2(i)
-         rots2(i)=e2(i) - b2(i) + 1
-      enddo
-      close(tr2)
-      write(*,'(2x,a,a,a,i0,a)')'File <',trim(ensemblename2),'> contains ',dum,' conformers.'
-      if(dum.lt.maxcompare)then
-         write(*,'(2x,a,i0,a)')'All of the ',dum,' conformers will be taken for the comparison:'
-      else
-         write(*,'(2x,a,i0,a)')'The ',nconf2,' lowest conformers will be taken for the comparison:'
-      endif
-      write(*,'(2x,a9,2x,a9)')'conformer','#rotamers'
-      do i=1,nconf2
-       write(*,'(i9,2x,i9)')i,rots2(i)
-      enddo
-      write(*,*)
-      
+    allocate (b2(nconf2),e2(nconf2),rots2(nconf2))
+    do i = 1,nconf2
+      read (tr2,*) l,b2(i),e2(i)
+      rots2(i) = e2(i)-b2(i)+1
+    end do
+    close (tr2)
+    write (*,'(2x,a,a,a,i0,a)') 'File <',trim(ensemblename2),'> contains ',dum,' conformers.'
+    if (dum .lt. maxcompare) then
+      write (*,'(2x,a,i0,a)') 'All of the ',dum,' conformers will be taken for the comparison:'
+    else
+      write (*,'(2x,a,i0,a)') 'The ',nconf2,' lowest conformers will be taken for the comparison:'
+    end if
+    write (*,'(2x,a9,2x,a9)') 'conformer','#rotamers'
+    do i = 1,nconf2
+      write (*,'(i9,2x,i9)') i,rots2(i)
+    end do
+    write (*,*)
+
 !---- run some checks that are mandatory in order for the compariso to work properly
-      if(nat1 /= nat2)then
-        write(*,'(a)') "Nat1 /= Nat2 : Number of atoms of the two ensembles don't match!"
-        write(*,'(a)') "You are trying to compare two different molecules!"
+    if (nat1 /= nat2) then
+      write (*,'(a)') "Nat1 /= Nat2 : Number of atoms of the two ensembles don't match!"
+      write (*,'(a)') "You are trying to compare two different molecules!"
+      error stop "exit."
+    else
+      nat = nat1
+    end if
+    do i = 1,nat
+      if (at1(i) /= at2(i)) then
+        write (*,'(a)') "The ordering of atoms apparently is different between the two ensembles!"
+        write (*,'(a)') "This way it is impossible to calculate RMSDs!"
         error stop "exit."
-      else
-        nat = nat1
-      endif
-      do i=1,nat
-        if(at1(i) /= at2(i))then
-          write(*,'(a)') "The ordering of atoms apparently is different between the two ensembles!"
-          write(*,'(a)') "This way it is impossible to calculate RMSDs!"
-          error stop "exit."
-        endif
-      enddo
+      end if
+    end do
 
 !---- set the threads for the RMSD calculation (OMP parallel)
-       if(env%autothreads)then
-         call ompautoset(env%threads,4,env%omp,env%MAXRUN,0) !mode=4 --> Program intern Threads max
-       endif
+    if (env%autothreads) then
+      call ompautoset(env%threads,4,env%omp,env%MAXRUN,0) !mode=4 --> Program intern Threads max
+    end if
 
 !---- printout
-      call smallhead('Comparing the Ensembles')
+    call smallhead('Comparing the Ensembles')
 
 !----- set up some more arrays
-      allocate(rmat(nconf1,nconf2),c1(3,nat),c2(3,nat),en1(nconf1),en2(nconf2))
+    allocate (rmat(nconf1,nconf2),c1(3,nat),c2(3,nat),en1(nconf1),en2(nconf2))
 !---- get energies of lowest conformers
-      do i=1,nconf1
-         en1(i)=eread1(b1(i))
-      enddo
-      do i=1,nconf2
-         en2(i)=eread2(b2(i))
-      enddo
-      deallocate(eread2,eread1)
-
-      write(*,'(1x,a)',advance='no')'Calculating RMSDs between conformers...'
-!-----compare conformer-block-wise, i.e. all rotamers of each conformer pair of ensemble 1 and 2     
+    do i = 1,nconf1
+      en1(i) = eread1(b1(i))
+    end do
+    do i = 1,nconf2
+      en2(i) = eread2(b2(i))
+    end do
+    deallocate (eread2,eread1)
+
+    write (*,'(1x,a)',advance='no') 'Calculating RMSDs between conformers...'
+!-----compare conformer-block-wise, i.e. all rotamers of each conformer pair of ensemble 1 and 2
 !----- THIS IS THE MAIN LOOP OF THE SUBROUTINE
-      do i=1,nconf1
-          be=b1(i)
-          ed=e1(i)
-          nrot1=ed - be + 1
-          !write(*,*) b,e,nrot1
-          allocate(conf1(3,nat,nrot1))
-          conf1(1:3,1:nat,1:nrot1)=xyz1(1:3,1:nat,be:ed)
-          do j=1,nconf2
-             be=b2(j)
-             ed=e2(j)
-             nrot2=ed - be + 1
-             allocate(conf2(3,nat,nrot2))
-             conf2(1:3,1:nat,1:nrot2)=xyz2(1:3,1:nat,be:ed)
-             rcount=nrot1*nrot2 !total number of RMSDs between
-             allocate(rmat2(rcount))
-             rmat2=100 !<--only done so we can easily get the minimum rmsd later
-             
-             do k=1,nrot1
-                c1(1:3,1:nat)=conf1(1:3,1:nat,k)
+    do i = 1,nconf1
+      be = b1(i)
+      ed = e1(i)
+      nrot1 = ed-be+1
+      !write(*,*) b,e,nrot1
+      allocate (conf1(3,nat,nrot1))
+      conf1(1:3,1:nat,1:nrot1) = xyz1(1:3,1:nat,be:ed)
+      do j = 1,nconf2
+        be = b2(j)
+        ed = e2(j)
+        nrot2 = ed-be+1
+        allocate (conf2(3,nat,nrot2))
+        conf2(1:3,1:nat,1:nrot2) = xyz2(1:3,1:nat,be:ed)
+        rcount = nrot1*nrot2 !total number of RMSDs between
+        allocate (rmat2(rcount))
+        rmat2 = 100 !<--only done so we can easily get the minimum rmsd later
+
+        do k = 1,nrot1
+          c1(1:3,1:nat) = conf1(1:3,1:nat,k)
 !$omp parallel private (l,rcount,c2,ydum,xdum,Udum,gdum) &
 !$omp shared (k,c1,rmat2,nat,conf2,nrot2)
 !$omp do
-                do l=1,nrot2
-                   rcount=((k-1)*nrot2) + l
-                   c2(1:3,1:nat)=conf2(1:3,1:nat,l) 
-                   call rmsd(nat,c1,c2,0,Udum,xdum,ydum,rmat2(rcount),.false.,gdum) ! all atoms
-                   !write(*,*)rmat2(rcount)
-                enddo
+          do l = 1,nrot2
+            rcount = ((k-1)*nrot2)+l
+            c2(1:3,1:nat) = conf2(1:3,1:nat,l)
+            call rmsd(nat,c1,c2,0,Udum,xdum,ydum,rmat2(rcount),.false.,gdum) ! all atoms
+            !write(*,*)rmat2(rcount)
+          end do
 !$omp end do
-!$omp end parallel 
-             enddo
-             rmat(i,j)=minval(rmat2)
-             deallocate(rmat2,conf2)
-          enddo
-          deallocate(conf1)
-      enddo
-      write(*,'(1x,a)')'done.'
+!$omp end parallel
+        end do
+        rmat(i,j) = minval(rmat2)
+        deallocate (rmat2,conf2)
+      end do
+      deallocate (conf1)
+    end do
+    write (*,'(1x,a)') 'done.'
 !---------
-       !--- print the RMSD matrix
-       write(*,'(1x,a,f8.4,a)') 'RMSD threshold:',rthr,' Å'
-       call pr_rmat(rmat,nconf1,nconf2)
+    !--- print the RMSD matrix
+    write (*,'(1x,a,f8.4,a)') 'RMSD threshold:',rthr,' Å'
+    call pr_rmat(rmat,nconf1,nconf2)
 !--------
-       write(*,*)
-       call smallhead('Correlation between Conformers :')
-       write(*,'(4x,a,5x,a10,13x,a,4x,a10)')'#','Ensemble A','#','Ensemble B'
-       dum=nconf1+nconf2
-       kk=nconf1
-       ll=nconf2
-       connect='<---->'
-       do 
-          matching=.false.
-          if(kk.gt.0.and.ll.gt.0)then
-          min_1=en1(kk)
-          min_2=en2(ll)
-          rval=rmat(kk,ll)
-           if(rval.le.rthr)then  !<--- two matching conformers from ensembles
-             write(*,'(1x,i4,1x,f14.5,3x,a,1x,i4,f14.5)')kk,min_1,connect,ll,min_2
-             kk=kk-1
-             ll=ll-1
-             matching=.true.
-           endif
-          endif
-          if(.not.matching)then
-            if(kk.gt.0.and.ll.gt.0)then
-               min_1=en1(kk)
-               min_2=en2(ll)
-               if(min_1.ge.min_2)then
-                 write(*,'(1x,i4,1x,f14.5)')kk,min_1
-                 kk=kk-1
-               else
-                 write(*,'(30x,i4,f14.5)')ll,min_2
-                 ll=ll-1
-               endif
-            endif
-            if(kk.gt.0.and.ll.eq.0)then
-               min_1=en1(kk)
-               write(*,'(1x,i4,1x,f14.5)')kk,min_1
-               kk=kk-1
-            endif
-            if(kk.eq.0.and.ll.gt.0)then
-               min_2=en2(ll)
-               write(*,'(30x,i4,f14.5)')ll,min_2               
-               ll=ll-1
-            endif
-          endif
-         if(kk.eq.0.and.ll.eq.0)exit
-       enddo
+    write (*,*)
+    call smallhead('Correlation between Conformers :')
+    write (*,'(4x,a,5x,a10,13x,a,4x,a10)') '#','Ensemble A','#','Ensemble B'
+    dum = nconf1+nconf2
+    kk = nconf1
+    ll = nconf2
+    connect = '<---->'
+    do
+      matching = .false.
+      if (kk .gt. 0.and.ll .gt. 0) then
+        min_1 = en1(kk)
+        min_2 = en2(ll)
+        rval = rmat(kk,ll)
+        if (rval .le. rthr) then  !<--- two matching conformers from ensembles
+          write (*,'(1x,i4,1x,f14.5,3x,a,1x,i4,f14.5)') kk,min_1,connect,ll,min_2
+          kk = kk-1
+          ll = ll-1
+          matching = .true.
+        end if
+      end if
+      if (.not.matching) then
+        if (kk .gt. 0.and.ll .gt. 0) then
+          min_1 = en1(kk)
+          min_2 = en2(ll)
+          if (min_1 .ge. min_2) then
+            write (*,'(1x,i4,1x,f14.5)') kk,min_1
+            kk = kk-1
+          else
+            write (*,'(30x,i4,f14.5)') ll,min_2
+            ll = ll-1
+          end if
+        end if
+        if (kk .gt. 0.and.ll .eq. 0) then
+          min_1 = en1(kk)
+          write (*,'(1x,i4,1x,f14.5)') kk,min_1
+          kk = kk-1
+        end if
+        if (kk .eq. 0.and.ll .gt. 0) then
+          min_2 = en2(ll)
+          write (*,'(30x,i4,f14.5)') ll,min_2
+          ll = ll-1
+        end if
+      end if
+      if (kk .eq. 0.and.ll .eq. 0) exit
+    end do
 
 !----- plotfiles
-       !--- convert to relative energies
-       min_1=minval(en1)
-       min_2=minval(en2)
-       min_tot=min(min_1,min_2)
-       !en1=(en1-min_tot)*autokcal
-       !en2=(en2-min_tot)*autokcal
-       !--- energies of ensemble 1
-       open(newunit=ich,file='energy_1.dat')
-       do i=1,nconf1
-          write(ich,'(2x,f10.5,2x,f14.8)') (en1(i)-min_tot)*autokcal,en1(i)
-       enddo
-       close(ich)
-       !--- energies of ensemble 2
-       open(newunit=ich,file='energy_2.dat')
-       do i=1,nconf2
-          write(ich,'(2x,f10.5,2x,f14.8)') (en2(i)-min_tot)*autokcal,en2(i)
-       enddo
-       close(ich)
-       !--- correlation between conformers of the two ensembles
-       open(newunit=ich2,file='rmsdmatch.dat')
-       do i=1,nconf1
-          do j=1,nconf2
-             rval=rmat(i,j)
-             if(rval.le.rthr)then
-                write(ich2,'(2x,i6,i6)')i,j
-             endif
-          enddo
-       enddo
-       close(ich2) 
-
+    !--- convert to relative energies
+    min_1 = minval(en1)
+    min_2 = minval(en2)
+    min_tot = min(min_1,min_2)
+    !en1=(en1-min_tot)*autokcal
+    !en2=(en2-min_tot)*autokcal
+    !--- energies of ensemble 1
+    open (newunit=ich,file='energy_1.dat')
+    do i = 1,nconf1
+      write (ich,'(2x,f10.5,2x,f14.8)') (en1(i)-min_tot)*autokcal,en1(i)
+    end do
+    close (ich)
+    !--- energies of ensemble 2
+    open (newunit=ich,file='energy_2.dat')
+    do i = 1,nconf2
+      write (ich,'(2x,f10.5,2x,f14.8)') (en2(i)-min_tot)*autokcal,en2(i)
+    end do
+    close (ich)
+    !--- correlation between conformers of the two ensembles
+    open (newunit=ich2,file='rmsdmatch.dat')
+    do i = 1,nconf1
+      do j = 1,nconf2
+        rval = rmat(i,j)
+        if (rval .le. rthr) then
+          write (ich2,'(2x,i6,i6)') i,j
+        end if
+      end do
+    end do
+    close (ich2)
 
 !-----
-      deallocate(rmat,c1,c2,en1,en2)      
-      deallocate(b1,e1,b2,e2)
-      deallocate(xyz2,xyz1,at2,at1)
-      deallocate(ydum,xdum,Udum,gdum)
+    deallocate (rmat,c1,c2,en1,en2)
+    deallocate (b1,e1,b2,e2)
+    deallocate (xyz2,xyz1,at2,at1)
+    deallocate (ydum,xdum,Udum,gdum)
 !-----
-      call compens_cleanup()
+    call compens_cleanup()
 
-      end associate
+  end associate
 
 end subroutine compare_ensembles
 
 !---------------------------------------------------------------------------------------
 subroutine compens_cleanup()
-         use iso_fortran_env, only : wp => real64
-         use iomod
-         implicit none
-         call remove('scoord.1')
-         call remove('ensemble1.inp.xyz')
-         call remove('ensemble2.inp.xyz')
-         call remove('track.1')
-         call remove('track.2')
-         call remove('crest_best.xyz')
-         call remove('cregen.out.tmp')
+  use iso_fortran_env,only:wp => real64
+  use iomod
+  implicit none
+  call remove('scoord.1')
+  call remove('ensemble1.inp.xyz')
+  call remove('ensemble2.inp.xyz')
+  call remove('track.1')
+  call remove('track.2')
+  call remove('crest_best.xyz')
+  call remove('cregen.out.tmp')
 end subroutine compens_cleanup
 
 !--------------------------------------------------------------------------------------
 subroutine pr_rmat(rmat,acon,bcon)
-         use iso_fortran_env, only : wp => real64
-         implicit none
-         integer :: acon,bcon
-         real(wp) :: rmat(acon,bcon)         
-         integer :: i,j
-         write(*,*)
-         write(*,*) 'RMSD matrix:'
-         write(*,'(2x,a9)',advance='no')'conformer'
-         do j=1,bcon
-            write(*,'(1x,i10)',advance='no')j
-         enddo
-         write(*,*)
-         do i=1,acon
-            write(*,'(1x,i5,5x)',advance='no')i
-            do j=1,bcon
-               write(*,'(1x,f10.5)',advance='no')rmat(i,j)
-            enddo
-            write(*,*)
-         enddo
+  use iso_fortran_env,only:wp => real64
+  implicit none
+  integer :: acon,bcon
+  real(wp) :: rmat(acon,bcon)
+  integer :: i,j
+  write (*,*)
+  write (*,*) 'RMSD matrix:'
+  write (*,'(2x,a9)',advance='no') 'conformer'
+  do j = 1,bcon
+    write (*,'(1x,i10)',advance='no') j
+  end do
+  write (*,*)
+  do i = 1,acon
+    write (*,'(1x,i5,5x)',advance='no') i
+    do j = 1,bcon
+      write (*,'(1x,f10.5)',advance='no') rmat(i,j)
+    end do
+    write (*,*)
+  end do
 end subroutine pr_rmat
 
diff --git a/src/filemod.f90 b/src/filemod.f90
index 3350c1c7..15e437d3 100644
--- a/src/filemod.f90
+++ b/src/filemod.f90
@@ -415,7 +415,7 @@ function lwidth(fname)
     integer :: lwidth
     character(len=*) :: fname
     integer :: ich,io
-    character(len=1024) :: str
+    character(len=5000) :: str
     open (newunit=ich,file=fname)
     lwidth = 0
     do
diff --git a/src/legacy_algos/confscript2_main.f90 b/src/legacy_algos/confscript2_main.f90
index fa50c728..204b9430 100644
--- a/src/legacy_algos/confscript2_main.f90
+++ b/src/legacy_algos/confscript2_main.f90
@@ -27,6 +27,7 @@ subroutine confscript2i_legacy(env,tim)
   use iomod
   use strucrd,only:coord2xyz,xyz2coord
   use utilities
+  use cregen_interface
   implicit none
   type(systemdata) :: env
   type(timer)   :: tim
diff --git a/src/legacy_algos/confscript2_misc.f90 b/src/legacy_algos/confscript2_misc.f90
index b3431629..79767f6a 100644
--- a/src/legacy_algos/confscript2_misc.f90
+++ b/src/legacy_algos/confscript2_misc.f90
@@ -829,6 +829,7 @@ subroutine cross3(env)
   use crest_data
   use iomod
   use utilities
+  use cregen_interface
   implicit none
   type(systemdata) :: env    ! MAIN STORAGE OS SYSTEM DATA
   real(wp) :: ewinbackup
@@ -899,6 +900,7 @@ end subroutine cross3
 !---------------------------------------------------------------------
 subroutine confg_chk3(env)
   use crest_data
+  use cregen_interface 
   implicit none
   type(systemdata) :: env    !> MAIN SYSTEM DATA
 
@@ -1190,6 +1192,7 @@ subroutine emtdcopy(env,iter,stopiter,broken)
   use iomod
   use strucrd
   use utilities 
+  use cregen_interface
   implicit none
   type(systemdata) :: env
   integer :: iter,iter2
diff --git a/src/legacy_algos/confscript3.f90 b/src/legacy_algos/confscript3.f90
index a7430609..7f999b42 100644
--- a/src/legacy_algos/confscript3.f90
+++ b/src/legacy_algos/confscript3.f90
@@ -25,6 +25,7 @@ subroutine mdopt_legacy(env,tim)
   use iomod
   use strucrd,only:rdensembleparam,rdensemble
   use utilities
+  use cregen_interface
   implicit none
 
   type(systemdata) :: env
diff --git a/src/legacy_algos/relaxensemble.f90 b/src/legacy_algos/relaxensemble.f90
index def74568..8cca12ee 100644
--- a/src/legacy_algos/relaxensemble.f90
+++ b/src/legacy_algos/relaxensemble.f90
@@ -28,6 +28,7 @@ subroutine relaxensemble(fname,env,tim)
       use crest_data
       use strucrd
       use iomod
+      use cregen_interface
       implicit none
 
       character(len=*) :: fname
diff --git a/src/parsing/parse_csv.f90 b/src/parsing/parse_csv.f90
index 6d6cc153..1ef1f618 100644
--- a/src/parsing/parse_csv.f90
+++ b/src/parsing/parse_csv.f90
@@ -46,6 +46,16 @@ module parse_csv
     module procedure :: parse_csv_columnnumber_real
   end interface parse_csv_column_real
 
+  public :: parse_csv_allcolumns
+  interface parse_csv_allcolumns
+    module procedure :: parse_csv_allcolumns_real
+  end interface parse_csv_allcolumns
+
+  public :: parse_csv_file_row
+  interface parse_csv_file_row
+    module procedure :: parse_csv_file_rownumber
+  end interface parse_csv_file_row
+
 !========================================================================================!
 !========================================================================================!
 contains  !> MODULE PROCEDURES START HERE
@@ -125,9 +135,47 @@ subroutine parse_csv_file_columnnumber(fname,getcol,column)
         if (debug) write (*,*) trim(column(i))
       end do
     end if
+    call file%close()
     !if (debug) stop
   end subroutine parse_csv_file_columnnumber
 
+
+  subroutine parse_csv_file_rownumber(fname,getrow,row)
+!*********************************************
+!* Routine for parsing the csv file fname
+!* and get a column as array of strings
+!*********************************************
+    implicit none
+    character(len=*),intent(in) :: fname
+    integer,intent(in) :: getrow
+    character(len=:),intent(out),allocatable :: row(:)
+    logical :: ex
+    character(len=:),allocatable :: hdr
+    integer :: i,j,k,l,nrow,ncol
+    type(filetype) :: file
+
+    inquire (file=fname,exist=ex)
+    if (.not.ex) return
+    call file%open(fname)
+
+    call csv_params(file,nrow,ncol)
+    if (debug) write (*,*) 'nrow',nrow
+    if (debug) write (*,*) 'ncol',ncol
+    l = file%lwidth
+    allocate (row(nrow),source=repeat(' ',l))
+
+    if (debug) write (*,*) 'trying to get row elements',getrow
+    if (getrow > 0.and.getrow <= nrow) then
+      do i = 1,ncol
+        !if (debug) write(*,*) file%line(i)
+        row(i) = get_column_element(file%line(getrow),i)
+        if (debug) write (*,*) trim(row(i))
+      end do
+    end if
+    call file%close()
+    !if (debug) stop
+ end subroutine parse_csv_file_rownumber
+
 !========================================================================================!
 
   subroutine parse_csv_columnname_real(fname,header,column)
@@ -189,6 +237,57 @@ subroutine parse_csv_columnnumber_real(fname,getcol,column)
     if (debug) stop
   end subroutine parse_csv_columnnumber_real
 
+
+  subroutine parse_csv_allcolumns_real(fname,columns,cols,rows)
+!*********************************************
+!* Routine for parsing the csv file fname
+!* and get a matrix of all columns/rows
+!*********************************************
+    implicit none
+    character(len=*),intent(in) :: fname
+    real(wp),intent(out),allocatable :: columns(:,:)
+    integer,intent(out),optional :: cols,rows
+    character(len=:),allocatable :: strcolumn
+    type(filetype) :: file
+    logical :: ex
+    integer :: getcol,i,j,k,l,io,nrow,ncol
+    real(wp) :: dum
+
+    inquire (file=fname,exist=ex)
+    if (.not.ex) return
+    call file%open(fname)
+
+    call csv_params(file,nrow,ncol)
+    if (debug) write (*,*) 'nrow',nrow
+    if (debug) write (*,*) 'ncol',ncol
+    l = file%lwidth
+    strcolumn=repeat(' ',l)
+    allocate (columns(ncol,nrow-1), source=0.0_wp)
+
+    if (debug) write (*,*) 'trying to get column',getcol
+    do i = 2,nrow
+      k = i-1 !> to skip header
+      do getcol=1,ncol
+        strcolumn = get_column_element(file%line(i),getcol)
+        if (debug) write (*,*) trim(strcolumn)
+        read (strcolumn,*,iostat=io) dum
+        if (io == 0) columns(getcol,k) = dum
+        if (debug) write (*,*) dum
+           
+      end do
+    enddo
+    deallocate(strcolumn)
+
+    if(present(cols))then
+       cols = ncol
+    endif
+    if(present(rows))then
+       rows = nrow-1  !> not counting the header lines
+    endif
+
+    call file%close()
+  end subroutine parse_csv_allcolumns_real
+
 !========================================================================================!
 
   subroutine parse_csv_columnname_int(fname,header,column)
diff --git a/src/parsing/parse_maindata.f90 b/src/parsing/parse_maindata.f90
index 91d99ea1..7cc73a9f 100644
--- a/src/parsing/parse_maindata.f90
+++ b/src/parsing/parse_maindata.f90
@@ -273,6 +273,7 @@ subroutine parse_confsolv(env,blk)
     integer :: i
 !>--- add ConfSolv as refinement level to give a ΔΔGsoln
     call env%addrefine(refine%ConfSolv)
+    env%refine_presort = .true.
 
 !>--- parse the arguments
     do i = 1,blk%nkv
@@ -291,9 +292,13 @@ subroutine parse_confsolv(env,blk)
         if (kv%na == 2) then
           cs_solvent = trim(kv%value_rawa(1))
           cs_smiles = trim(kv%value_rawa(2))
+        else if(index(kv%value_c,'.csv').ne.0)then
+          cs_solvfile = kv%value_c
         else
           cs_solvent = kv%value_c
         end if
+      case('solvent_csv','solvfile')
+        cs_solvfile = kv%value_c
       case ('solvent_name')
         cs_solvent = kv%value_c
       case ('solvent_smiles')
diff --git a/src/propcalc.f90 b/src/propcalc.f90
index b313b7ca..76f045c4 100644
--- a/src/propcalc.f90
+++ b/src/propcalc.f90
@@ -51,6 +51,7 @@ subroutine propcalc(iname,imode,env,tim)
   use iomod
   use strucrd,only:rdensembleparam,rdensemble,wrxyz
   use utilities,only:boltz2
+  use cregen_interface
   implicit none
 
   type(systemdata) :: env
diff --git a/src/qcg/solvtool.f90 b/src/qcg/solvtool.f90
index 318ebe78..63c6040f 100644
--- a/src/qcg/solvtool.f90
+++ b/src/qcg/solvtool.f90
@@ -847,6 +847,7 @@ subroutine qcg_ensemble(env, solu, solv, clus, ens, tim, fname_results)
    use zdata
    use strucrd
    use utilities
+   use cregen_interface
    implicit none
 
    type(systemdata)           :: env