From b4e94d5272f1d8a9c30cd2b20aab1d43364d7a10 Mon Sep 17 00:00:00 2001
From: Thomas Holder <thomas.holder@rentschler.de>
Date: Tue, 19 Dec 2023 20:14:10 +0100
Subject: [PATCH] Upgrades for Python 3.12

- Replace `imp.load_source`
- Fix invalid escape sequences (W605)
- Require Python 3.6+
---
 README.rst                                    |  5 ++---
 csb/bio/io/hhpred.py                          |  6 +++---
 csb/bio/io/procheck.py                        | 20 +++++++++----------
 csb/bio/io/whatif.py                          | 10 +++++-----
 csb/bio/io/wwpdb.py                           |  2 +-
 csb/bio/sequence/__init__.py                  |  2 +-
 csb/bio/structure/__init__.py                 |  4 ++--
 csb/build.py                                  | 20 ++++++++++++-------
 csb/statistics/__init__.py                    |  2 +-
 csb/test/__init__.py                          | 10 +++++++---
 csb/test/cases/core/__init__.py               |  5 +++--
 csb/test/cases/io/__init__.py                 |  2 +-
 .../cases/statistics/samplers/__init__.py     |  8 +++++++-
 setup.py                                      | 15 ++++++--------
 14 files changed, 62 insertions(+), 49 deletions(-)

diff --git a/README.rst b/README.rst
index 7d27bce..bc34850 100644
--- a/README.rst
+++ b/README.rst
@@ -19,10 +19,9 @@ package consists of a few major components:
 
 Installation 
 ------------
-CSB is being developed on Linux with Python 2.7 and 3.6. However, compatibility
+CSB is being developed on Linux. However, compatibility
 is a design goal and the package works on any platform, on any modern Python
-interpreter since version 2.6 (that includes python 3 support out of
-the box). If you find any issues on a platform/interpreter different from
+interpreter. If you find any issues on a platform/interpreter different from
 our development environment, please let us know.
 
 CSB and all of its dependencies can be installed with pip::
diff --git a/csb/bio/io/hhpred.py b/csb/bio/io/hhpred.py
index 044bd28..b7d0102 100644
--- a/csb/bio/io/hhpred.py
+++ b/csb/bio/io/hhpred.py
@@ -207,7 +207,7 @@ def _parse_properties(self, hmm):
                 hmm.family = line[6:].strip()
 
             elif line.startswith('LENG '):
-                m = re.search('(\d+)\D+(\d+)', line).groups()
+                m = re.search(r'(\d+)\D+(\d+)', line).groups()
                 m = tuple(map(int, m))
                 hmm.length = ProfileLength(m[0], m[1])
 
@@ -257,7 +257,7 @@ def _parse_sequences(self, hmm):
             if header_token in ['>ss_dssp', '>sa_dssp', '>ss_pred', '>ss_conf', '>Consens']:
 
                 lines = entry.strip().splitlines()
-                seq = re.sub('\s+', '', ''.join(lines[1:]))
+                seq = re.sub(r'\s+', '', ''.join(lines[1:]))
 
                 if header_token == '>ss_dssp':
                     hmm.dssp = structure.SecondaryStructure(seq)
@@ -304,7 +304,7 @@ def _parse_profile(self, hmm, units=ScoreUnits.LogScales):
         start_probs = None
 
         lines = iter(self._profile)
-        pattern = re.compile('^[A-Z\-]\s[0-9]+\s+')
+        pattern = re.compile(r'^[A-Z\-]\s[0-9]+\s+')
 
         if units == ScoreUnits.LogScales:
 
diff --git a/csb/bio/io/procheck.py b/csb/bio/io/procheck.py
index 46f8148..941cc81 100644
--- a/csb/bio/io/procheck.py
+++ b/csb/bio/io/procheck.py
@@ -28,16 +28,16 @@ def parse(self, fn):
         f_handler = open(os.path.expanduser(fn))
         text = f_handler.read()
         
-        input_file_name = re.compile('>>>-----.*?\n.*?\n\s*\|\s*(\S+)\s+')
-        residues = re.compile('(\d+)\s*residues\s\|')
-        ramachandran_plot = re.compile('Ramachandran\splot:\s*(\d+\.\d+)' + 
-                                      '%\s*core\s*(\d+\.\d+)%\s*allow\s*(\d+\.\d+)' + 
-                                      '%\s*gener\s*(\d+\.\d+)%\s*disall')
-        labelled_all = re.compile('Ramachandrans:\s*(\d+)\s*.*?out\sof\s*(\d+)')
-        labelled_chi = re.compile('Chi1-chi2\splots:\s*(\d+)\s*.*?out\sof\s*(\d+)')
-        bad_contacts = re.compile('Bad\scontacts:\s*(\d+)')
-        g_factors = re.compile('G-factors\s*Dihedrals:\s*([0-9-+.]+)' + 
-                              '\s*Covalent:\s*([0-9-+.]+)\s*Overall:\s*([0-9-+.]+)')
+        input_file_name = re.compile('>>>-----.*?\n.*?\n' r'\s*\|\s*(\S+)\s+')
+        residues = re.compile(r'(\d+)\s*residues\s\|')
+        ramachandran_plot = re.compile(r'Ramachandran\splot:\s*(\d+\.\d+)' + 
+                                      r'%\s*core\s*(\d+\.\d+)%\s*allow\s*(\d+\.\d+)' + 
+                                      r'%\s*gener\s*(\d+\.\d+)%\s*disall')
+        labelled_all = re.compile(r'Ramachandrans:\s*(\d+)\s*.*?out\sof\s*(\d+)')
+        labelled_chi = re.compile(r'Chi1-chi2\splots:\s*(\d+)\s*.*?out\sof\s*(\d+)')
+        bad_contacts = re.compile(r'Bad\scontacts:\s*(\d+)')
+        g_factors = re.compile(r'G-factors\s*Dihedrals:\s*([0-9-+.]+)' + 
+                              r'\s*Covalent:\s*([0-9-+.]+)\s*Overall:\s*([0-9-+.]+)')
 
         info['input_file'] = input_file_name.search(text).groups()[0]
         info['#residues'] = int(residues.search(text).groups()[0])
diff --git a/csb/bio/io/whatif.py b/csb/bio/io/whatif.py
index 71d6c5d..2063486 100644
--- a/csb/bio/io/whatif.py
+++ b/csb/bio/io/whatif.py
@@ -29,11 +29,11 @@ def parse_summary(self, fn):
         text = f_handler.read()
 
         info = dict()
-        re_ramachandran = re.compile('Ramachandran\s*Z-score\s*:\s*([0-9.Ee-]+)')
-        re_1st = re.compile('1st\s*generation\s*packing\s*quality\s*:\s*([0-9.Ee-]+)')
-        re_2nd = re.compile('2nd\s*generation\s*packing\s*quality\s*:\s*([0-9.Ee-]+)')
-        re_backbone = re.compile('Backbone\s*conformation\s*Z-score\s*:\s*([0-9.Ee-]+)')
-        re_rotamer = re.compile('chi-1\S*chi-2\s*rotamer\s*normality\s*:\s*([0-9.Ee-]+)')
+        re_ramachandran = re.compile(r'Ramachandran\s*Z-score\s*:\s*([0-9.Ee-]+)')
+        re_1st = re.compile(r'1st\s*generation\s*packing\s*quality\s*:\s*([0-9.Ee-]+)')
+        re_2nd = re.compile(r'2nd\s*generation\s*packing\s*quality\s*:\s*([0-9.Ee-]+)')
+        re_backbone = re.compile(r'Backbone\s*conformation\s*Z-score\s*:\s*([0-9.Ee-]+)')
+        re_rotamer = re.compile(r'chi-1\S*chi-2\s*rotamer\s*normality\s*:\s*([0-9.Ee-]+)')
         
 
         info['rama_z_score'] = float(re_ramachandran.search(text).groups(0)[0])
diff --git a/csb/bio/io/wwpdb.py b/csb/bio/io/wwpdb.py
index 35750fe..f128e51 100644
--- a/csb/bio/io/wwpdb.py
+++ b/csb/bio/io/wwpdb.py
@@ -1219,7 +1219,7 @@ def _read_resolution(self, line):
         @return: resolution
         @rtype: float or None
         """
-        res = re.search("(\d+(?:\.\d+)?)\s+ANGSTROM", line)
+        res = re.search(r"(\d+(?:\.\d+)?)\s+ANGSTROM", line)
         
         if res and res.groups():
             return float(res.group(1))
diff --git a/csb/bio/sequence/__init__.py b/csb/bio/sequence/__init__.py
index 70b6313..1cee2c6 100644
--- a/csb/bio/sequence/__init__.py
+++ b/csb/bio/sequence/__init__.py
@@ -563,7 +563,7 @@ def __init__(self, id, header, residues, type=SequenceTypes.Unknown):
     
     def _append(self, string):
         # this seems to be the fastest method for sanitization and storage        
-        self._residues += re.sub('([^\w\-\.])+', '', string)
+        self._residues += re.sub(r'([^\w\-\.])+', '', string)
         
     def _add(self, char):
         self._append(char)
diff --git a/csb/bio/structure/__init__.py b/csb/bio/structure/__init__.py
index 8a3ac39..b0c2b97 100644
--- a/csb/bio/structure/__init__.py
+++ b/csb/bio/structure/__init__.py
@@ -2126,9 +2126,9 @@ def parse(string, conf_string=None):
         if not isinstance(string, csb.core.string):
             raise TypeError(string)
                 
-        string = ''.join(re.split('\s+', string))
+        string = ''.join(re.split(r'\s+', string))
         if conf_string is not None:
-            conf_string = ''.join(re.split('\s+', conf_string))
+            conf_string = ''.join(re.split(r'\s+', conf_string))
             if not len(string) == len(conf_string):
                 raise ValueError('The confidence string has unexpected length.')
         motifs = [ ]
diff --git a/csb/build.py b/csb/build.py
index 95b9893..bd62503 100644
--- a/csb/build.py
+++ b/csb/build.py
@@ -22,7 +22,6 @@
        source trees.
 @see: [CSB 0000038]
 """
-from __future__ import print_function
 
 import os
 import sys
@@ -55,7 +54,7 @@
 """
 It is now safe to import any modules  
 """
-import imp
+import importlib
 import shutil
 import tarfile
 
@@ -65,6 +64,13 @@
 from csb.io import Shell
 
 
+def _load_source(name, pathname):
+    spec = importlib.util.spec_from_file_location(name, pathname)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    return module
+
+
 class BuildTypes(object):
     """
     Enumeration of build types.
@@ -288,7 +294,7 @@ def _doc(self, version):
             epydoc.cli.cli()
             sys.exit(0)
         except SystemExit as ex:
-            if ex.code is 0:
+            if ex.code == 0:
                 self.log('\n  Passed all doc tests')
             else:
                 if ex.code == 2:
@@ -333,7 +339,7 @@ def _package(self):
         version = package = None
 
         try:       
-            setup = imp.load_source('setupcsb', 'setup.py')
+            setup = _load_source('setupcsb', 'setup.py')
             d = setup.build()
             version = setup.VERSION
             package = d.dist_files[0][2]
@@ -342,7 +348,7 @@ def _package(self):
                 self._strip_source(package)
             
         except SystemExit as ex:
-            if ex.code is not 0:
+            if ex.code != 0:
                 self.log('\n  FAIL: Setup returned: \n\n{0}\n'.format(ex))
                 self._success = False
                 package = 'FAIL'
@@ -474,7 +480,7 @@ def __init__(self, path, sc):
             self._path = path
         else:
             raise IOError('Path not found: {0}'.format(path))
-        if Shell.run([sc, 'help']).code is 0:
+        if Shell.run([sc, 'help']).code == 0:
             self._sc = sc
         else:
             raise RevisionError('Source control binary probe failed', None, None)
@@ -518,7 +524,7 @@ def write(self, revision, sourcefile):
                     src.write(line)
 
         self._delcache(sourcefile)
-        return imp.load_source('____source', sourcefile).__version__      
+        return _load_source('____source', sourcefile).__version__
     
     def _run(self, cmd):
         
diff --git a/csb/statistics/__init__.py b/csb/statistics/__init__.py
index 54cfec1..67b9776 100644
--- a/csb/statistics/__init__.py
+++ b/csb/statistics/__init__.py
@@ -338,7 +338,7 @@ def running_average(x, w, axis=None):
     return array([mean(x[i:i + w], axis) for i in range(len(x) - w)])
 
 def weighted_median(x, w):
-    """
+    r"""
     Calculates the weighted median, that is the minimizer of
     argmin {\sum w_i |x_i - \mu|}
 
diff --git a/csb/test/__init__.py b/csb/test/__init__.py
index 96503a9..a12a5b3 100644
--- a/csb/test/__init__.py
+++ b/csb/test/__init__.py
@@ -159,7 +159,7 @@ def testSomeFeature(self)
 """
 import os
 import sys
-import imp
+import importlib
 import types
 import time
 import tempfile
@@ -443,7 +443,7 @@ def execute(cls):
                 
         return runner.run(suite)            
 
-class InvalidNamespaceError(NameError, ImportError):
+class InvalidNamespaceError(ImportError):
     pass
     
 class AbstractTestBuilder(object):
@@ -567,7 +567,11 @@ def _loadSource(self, path):
         name = os.path.splitext(os.path.abspath(path))[0]
         name = name.replace('.', '-').rstrip('__init__').strip(os.path.sep)
         
-        return imp.load_source(name, path)        
+        spec = importlib.util.spec_from_file_location(name, path)
+        module = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(module)
+
+        return module
     
     def _recurse(self, obj):
         """
diff --git a/csb/test/cases/core/__init__.py b/csb/test/cases/core/__init__.py
index 8eaebb7..5eedf10 100644
--- a/csb/test/cases/core/__init__.py
+++ b/csb/test/cases/core/__init__.py
@@ -18,7 +18,7 @@ def runTest(self):
         copy = utils.deepcopy(obj, recursion=(rec + 1))
         
         self.assertEqual(obj, copy)
-        self.assertNotEquals(id(obj), id(copy))
+        self.assertNotEqual(id(obj), id(copy))
 
 @test.unit
 class TestIterable(test.Case):
@@ -70,7 +70,8 @@ def test():
     def testComparison(self):
         self.assertEqual(self.enum.A, 0)
         self.assertEqual(self.enum.C, 66)
-        self.assertFalse(self.enum.C is 66)
+        int_66 = 66
+        self.assertFalse(self.enum.C is int_66)
         self.assertFalse(isinstance(self.enum.A, int))
         
     def testStr(self):
diff --git a/csb/test/cases/io/__init__.py b/csb/test/cases/io/__init__.py
index e303a70..e016716 100644
--- a/csb/test/cases/io/__init__.py
+++ b/csb/test/cases/io/__init__.py
@@ -479,7 +479,7 @@ def setUp(self):
                         "Although that way may not be" + \
                         "obvious at first" + \
                         "unless you're Dutch.",
-                        "([0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])"]
+                        r"([0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])"]
 
         # Completly connnected graph
         
diff --git a/csb/test/cases/statistics/samplers/__init__.py b/csb/test/cases/statistics/samplers/__init__.py
index 598862b..db036ab 100644
--- a/csb/test/cases/statistics/samplers/__init__.py
+++ b/csb/test/cases/statistics/samplers/__init__.py
@@ -801,6 +801,12 @@ def clone(self):
         return s
 
 
+def _arrays_equal(a1, a2) -> bool:
+    if a1.shape != a2.shape:
+        return False
+    return (a1 == a2).all()
+
+
 @test.functional
 class TestReplicaHistory(test.Case):
 
@@ -863,7 +869,7 @@ def _assertIdenticalProjTrajs(self, samples, interval, first_swap=None):
         ok = []
         for i in range(len(samples[0])):
             trajs2 = rh.calculate_projected_trajectories(i)
-            ok.append(True in [np.all(np.array(t1) == np.array(t2)) for t1 in trajs1
+            ok.append(True in [_arrays_equal(np.array(t1), np.array(t2)) for t1 in trajs1
                                for t2 in trajs2])
             
         return np.all(ok)
diff --git a/setup.py b/setup.py
index 7441e09..99c1f2a 100644
--- a/setup.py
+++ b/setup.py
@@ -21,9 +21,6 @@
 REQUIREMENTS = open("requirements.txt", encoding="utf-8").readlines()
 DEV_REQUIREMENTS = []
 
-if sys.version_info[0] == 2:
-    DEV_REQUIREMENTS.append("epydoc")
-
 v = {}
 exec(open(ROOT + "/__init__.py", encoding="utf-8").read(), v)
 VERSION = v["Version"]()
@@ -69,13 +66,13 @@ def build():
             'License :: OSI Approved :: MIT License',
             'Operating System :: OS Independent',
             'Programming Language :: Python',
-            'Programming Language :: Python :: 2.7',
-            'Programming Language :: Python :: 3.1',
-            'Programming Language :: Python :: 3.2',
-            'Programming Language :: Python :: 3.3',
-            'Programming Language :: Python :: 3.4',
-            'Programming Language :: Python :: 3.5',
             'Programming Language :: Python :: 3.6',
+            'Programming Language :: Python :: 3.7',
+            'Programming Language :: Python :: 3.8',
+            'Programming Language :: Python :: 3.9',
+            'Programming Language :: Python :: 3.10',
+            'Programming Language :: Python :: 3.11',
+            'Programming Language :: Python :: 3.12',
             'Topic :: Scientific/Engineering',
             'Topic :: Scientific/Engineering :: Bio-Informatics',
             'Topic :: Scientific/Engineering :: Mathematics',