Skip to content

Commit

Permalink
ddl.cpp: tweak for regexes
Browse files Browse the repository at this point in the history
  • Loading branch information
wojdyr committed Jan 9, 2025
1 parent 832b039 commit e72d7b8
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 12 deletions.
6 changes: 3 additions & 3 deletions src/ddl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -475,9 +475,9 @@ void Ddl::read_ddl2_block(cif::Block& block) {
// mmcif_pdbx_v50.dic uses custom flavour of regex:
// character classes have unescaped \, but recognize \n, \t, etc.
// Here is a quick fix:
std::string::size_type pos = re_str.find("/\\{}");
if (pos != std::string::npos)
re_str.replace(pos, 4, "/\\\\{}");
gemmi::replace_all(re_str, "/\\{}", "/\\\\{}");
// in binary, \<newline> is apparently meant to be ignored
gemmi::replace_all(re_str, "\\\n", "");
auto flag = std::regex::awk | std::regex::optimize;
regexes_.emplace(row.str(0), std::regex(re_str, flag));
} catch (const std::regex_error& e) {
Expand Down
6 changes: 3 additions & 3 deletions tests/mmcif_pdbx_v50_frag.dic
Original file line number Diff line number Diff line change
Expand Up @@ -540,8 +540,8 @@ data_mmcif_pdbx.dic
'[A-Za-z0-9]+(,[A-Za-z0-9]+)*'
; A list of comma separated chain or asym ids.
;
3x4_matrices char
'((([ \t]*-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? +){3}?(-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? *\n([\t ]*\n)*)){3})*(([ \t]*-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? +){3}?(-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? *\n([\t ]*\n)*)){2}(([ \t]*-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? +){3}(-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? *\n?([\t ]*\n)*))[ \t]*'

3x4_matrices char "(((([ \t]*-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? +){3})?(-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? *\n([\t ]*\n)*)){3})*((([ \t]*-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? +){3})?(-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? *\n([\t ]*\n)*)){2}((([ \t]*-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? +){3})(-?(([0-9]+)[.]?|([0-9]*[.][0-9]+))([(][0-9]+[)])?([eE][+-]?[0-9]+)? *\n?([\t ]*\n)*))[ \t]*"
; A set of 3x4 matrices separated by spaces and newlines on each line.
Final newline optional. Optional spaces at start of lines. Blank lines accepted.
;
Expand All @@ -556,7 +556,7 @@ data_mmcif_pdbx.dic
author char "[A-Za-z0-9_]+(( |-|'|\. )[A-Za-z0-9_]+)*( Jr.| III)?, [A-Za-z0-9_]\.(-?[A-Za-z0-9_]+\.)*$" 'Author name in PDB format: Taylor, C.A.'
orcid_id char "[0-9]{4}-[0-9]{4}-[0-9]{4}-([0-9]{3}X|[0-9]{4})" 'ORCID pattern - dddd-dddd-dddd-dddd|dddX'
symmetry_operation char '[-+0-9XxYyZ/ ]+,[-+0-9XxYyZ/ ]+,[-+0-9XxYyZ/ ]+' 'Allowed characters for use in symmetry operation such as 1/2-x,y,1/2-z'
sequence_dep char '[a-zA-Z0-9\t \r\n\v\f\(\)]+$' 'Deposition specific one letter code'
sequence_dep char '[a-zA-Z0-9\t \r\n()]+$' 'Deposition specific one letter code'
date_dep char '([1-9][0-9](([02468][048])|([13579][26]))-02-29)|[1-9][0-9][0-9][0-9]-((((0[1-9])|(1[0-2]))-((0[1-9])|(1[0-9])|(2[0-8])))|((((0[13578])|(1[02]))-31)|(((0[1,3-9])|(1[0-2]))-(29|30))))' 'Deposition specific date with better checking'

#####################
Expand Down
7 changes: 1 addition & 6 deletions tests/test_cif.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,12 +391,7 @@ def test_validation(self):
msg_list = []
ddl = cif.Ddl(logger=(lambda msg: msg_list.append(msg), 6))
ddl.read_ddl(cif.read(full_path('mmcif_pdbx_v50_frag.dic')))
self.assertEqual(len(msg_list), 1,
msg=f'Messages:\n {"\n ".join(msg_list)}')
for msg in msg_list:
expected = "Bad DDL2: can't parse regex for 'binary':"
self.assertTrue(msg.startswith(expected), msg=msg)
msg_list = []
self.assertEqual(msg_list, [])
ddl.validate_cif(doc)
self.assertEqual(msg_list,
['[dummy_block] unknown tag _custom_tag',
Expand Down

0 comments on commit e72d7b8

Please sign in to comment.