@@ -23,6 +23,7 @@ using Avogadro::Core::Bond;
23
23
using Avogadro::Core::Elements;
24
24
using Avogadro::Core::lexicalCast;
25
25
using Avogadro::Core::Molecule;
26
+ using Avogadro::Core::split;
26
27
using Avogadro::Core::startsWith;
27
28
using Avogadro::Core::trimmed;
28
29
@@ -92,8 +93,10 @@ bool MdlFormat::read(std::istream& in, Core::Molecule& mol)
92
93
return false ;
93
94
}
94
95
string mdlVersion (trimmed (buffer.substr (33 )));
95
- if (mdlVersion != " V2000" ) {
96
- appendError (" Unsupported file format version encountered: " + mdlVersion);
96
+ if (mdlVersion == " V3000" )
97
+ return readV3000 (in, mol);
98
+ else if (mdlVersion != " V2000" ) {
99
+ appendError (" Unsupported MDL version: " + mdlVersion);
97
100
return false ;
98
101
}
99
102
@@ -241,8 +244,8 @@ bool MdlFormat::read(std::istream& in, Core::Molecule& mol)
241
244
dataValue += buffer;
242
245
}
243
246
} else if (startsWith (buffer, " > <" )) {
244
- // This is a data header, read the name of the entry, and the value on the
245
- // following lines.
247
+ // This is a data header, read the name of the entry, and the value on
248
+ // the following lines.
246
249
dataName = trimmed (buffer).substr (3 , buffer.length () - 4 );
247
250
inValue = true ;
248
251
}
@@ -251,11 +254,207 @@ bool MdlFormat::read(std::istream& in, Core::Molecule& mol)
251
254
return true ;
252
255
}
253
256
257
+ bool MdlFormat::readV3000 (std::istream& in, Core::Molecule& mol)
258
+ {
259
+ string buffer;
260
+ // we should have M V30 BEGIN CTAB
261
+ getline (in, buffer);
262
+ if (trimmed (buffer) != " M V30 BEGIN CTAB" ) {
263
+ appendError (" Error parsing V3000 file, expected 'M V30 BEGIN CTAB'." );
264
+ return false ;
265
+ }
266
+ // now we should get the counts line
267
+ // e.g. 'M V30 COUNTS 23694 24297 0 0 1'
268
+ getline (in, buffer);
269
+ // split by whitespace
270
+ std::vector<string> counts = split (trimmed (buffer), ' ' );
271
+ if (counts.size () < 5 ) {
272
+ appendError (" Error parsing V3000 counts line." );
273
+ return false ;
274
+ }
275
+ bool ok (false );
276
+ int numAtoms (lexicalCast<int >(counts[3 ], ok));
277
+ if (!ok) {
278
+ appendError (" Error parsing number of atoms." );
279
+ return false ;
280
+ }
281
+ int numBonds (lexicalCast<int >(counts[4 ], ok));
282
+ if (!ok) {
283
+ appendError (" Error parsing number of bonds." );
284
+ return false ;
285
+ }
286
+
287
+ // Parse the atom block.
288
+ // 'M V30 BEGIN ATOM'
289
+ // 'M V30 1 N 171.646 251.874 224.877 0'
290
+ getline (in, buffer);
291
+ if (trimmed (buffer) != " M V30 BEGIN ATOM" ) {
292
+ appendError (" Error parsing V3000 atom block." );
293
+ return false ;
294
+ }
295
+ for (int i = 0 ; i < numAtoms; ++i) {
296
+ getline (in, buffer);
297
+ std::vector<string> atomData = split (trimmed (buffer), ' ' );
298
+ if (atomData.size () < 7 ) {
299
+ appendError (" Error parsing V3000 atom line." );
300
+ return false ;
301
+ }
302
+
303
+ string element (trimmed (atomData[3 ]));
304
+ unsigned char atomicNum = Elements::atomicNumberFromSymbol (element);
305
+ Atom newAtom = mol.addAtom (atomicNum);
306
+
307
+ Vector3 pos;
308
+ pos.x () = lexicalCast<Real>(atomData[4 ], ok);
309
+ if (!ok) {
310
+ appendError (" Failed to parse x coordinate: " + atomData[3 ]);
311
+ return false ;
312
+ }
313
+ pos.y () = lexicalCast<Real>(atomData[5 ], ok);
314
+ if (!ok) {
315
+ appendError (" Failed to parse y coordinate: " + atomData[4 ]);
316
+ return false ;
317
+ }
318
+ pos.z () = lexicalCast<Real>(atomData[6 ], ok);
319
+ if (!ok) {
320
+ appendError (" Failed to parse z coordinate: " + atomData[5 ]);
321
+ return false ;
322
+ }
323
+ newAtom.setPosition3d (pos);
324
+ // check for formal charge in the atom block
325
+ // CHG=1 for example
326
+ if (atomData.size () > 8 ) {
327
+ string chargeData = atomData[8 ];
328
+ if (startsWith (chargeData, " CHG=" )) {
329
+ int charge = lexicalCast<int >(chargeData.substr (4 ), ok);
330
+ if (!ok) {
331
+ appendError (" Failed to parse atom charge: " + chargeData);
332
+ return false ;
333
+ }
334
+ newAtom.setFormalCharge (charge);
335
+ }
336
+ }
337
+ } // end of atom block
338
+ getline (in, buffer);
339
+ // check for END ATOM
340
+ if (trimmed (buffer) != " M V30 END ATOM" ) {
341
+ appendError (" Error parsing V3000 atom block." );
342
+ return false ;
343
+ }
344
+
345
+ // bond block
346
+ // 'M V30 BEGIN BOND'
347
+ // 'M V30 1 1 1 2'
348
+ getline (in, buffer);
349
+ if (trimmed (buffer) != " M V30 BEGIN BOND" ) {
350
+ appendError (" Error parsing V3000 bond block." );
351
+ return false ;
352
+ }
353
+ for (int i = 0 ; i < numBonds; ++i) {
354
+ getline (in, buffer);
355
+ std::vector<string> bondData = split (trimmed (buffer), ' ' );
356
+ if (bondData.size () < 5 ) {
357
+ appendError (" Error parsing V3000 bond line." );
358
+ return false ;
359
+ }
360
+ int order = lexicalCast<int >(bondData[3 ], ok);
361
+ if (!ok) {
362
+ appendError (" Failed to parse bond order: " + bondData[3 ]);
363
+ return false ;
364
+ }
365
+ int atom1 = lexicalCast<int >(bondData[4 ], ok) - 1 ;
366
+ if (!ok) {
367
+ appendError (" Failed to parse bond atom1: " + bondData[4 ]);
368
+ return false ;
369
+ }
370
+ int atom2 = lexicalCast<int >(bondData[5 ], ok) - 1 ;
371
+ if (!ok) {
372
+ appendError (" Failed to parse bond atom2: " + bondData[5 ]);
373
+ return false ;
374
+ }
375
+ mol.addBond (mol.atom (atom1), mol.atom (atom2),
376
+ static_cast <unsigned char >(order));
377
+ } // end of bond block
378
+
379
+ // look for M END
380
+ while (getline (in, buffer)) {
381
+ if (trimmed (buffer) == " M END" )
382
+ break ;
383
+ }
384
+ // read in any properties
385
+ while (getline (in, buffer)) {
386
+ if (startsWith (buffer, " > <" )) {
387
+ string key = trimmed (buffer.substr (3 , buffer.length () - 4 ));
388
+ string value;
389
+ while (getline (in, buffer)) {
390
+ if (trimmed (buffer) == " " )
391
+ break ;
392
+ value += buffer + " \n " ;
393
+ }
394
+ mol.setData (key, value);
395
+ }
396
+ }
397
+
398
+ return true ;
399
+ }
400
+
401
+ bool MdlFormat::writeV3000 (std::ostream& out, const Core::Molecule& mol)
402
+ {
403
+ // write the "fake" counts line
404
+ out << " 0 0 0 0 0 999 V3000\n " ;
405
+ out << " M V30 BEGIN CTAB\n " ;
406
+ out << " M V30 COUNTS " << mol.atomCount () << ' ' << mol.bondCount ()
407
+ << " 0 0 0\n " ;
408
+ // atom block
409
+ out << " M V30 BEGIN ATOM\n " ;
410
+ for (size_t i = 0 ; i < mol.atomCount (); ++i) {
411
+ Atom atom = mol.atom (i);
412
+ out << " M V30 " << i + 1 << ' ' << Elements::symbol (atom.atomicNumber ())
413
+ << ' ' << atom.position3d ().x () << ' ' << atom.position3d ().y () << ' '
414
+ << atom.position3d ().z () << " 0" ;
415
+ if (atom.formalCharge ())
416
+ out << " CHG=" << atom.formalCharge ();
417
+ out << " \n " ;
418
+ }
419
+ out << " M V30 END ATOM\n " ;
420
+ // bond block
421
+ out << " M V30 BEGIN BOND\n " ;
422
+ for (size_t i = 0 ; i < mol.bondCount (); ++i) {
423
+ Bond bond = mol.bond (i);
424
+ out << " M V30 " << i + 1 << ' ' << static_cast <int >(bond.order ()) << ' '
425
+ << (bond.atom1 ().index () + 1 ) << ' ' << (bond.atom2 ().index () + 1 )
426
+ << " \n " ;
427
+ }
428
+ out << " M V30 END BOND\n " ;
429
+ out << " M V30 END CTAB\n " ;
430
+ out << " M END\n " ;
431
+
432
+ // TODO: isotopes, radicals, etc.
433
+ if (m_writeProperties) {
434
+ const auto dataMap = mol.dataMap ();
435
+ for (const auto & key : dataMap.names ()) {
436
+ out << " > <" << key << " >\n " ;
437
+ out << dataMap.value (key).toString () << " \n " ;
438
+ out << " \n " ; // empty line between data blocks
439
+ }
440
+ }
441
+
442
+ if (m_writeProperties || isMode (FileFormat::MultiMolecule))
443
+ out << " $$$$\n " ;
444
+
445
+ return true ;
446
+ }
447
+
254
448
bool MdlFormat::write (std::ostream& out, const Core::Molecule& mol)
255
449
{
256
450
// Header lines.
257
451
out << mol.data (" name" ).toString () << " \n Avogadro\n\n " ;
258
452
// Counts line.
453
+ if (mol.atomCount () > 999 || mol.bondCount () > 999 ) {
454
+ // we need V3000 support for big molecules
455
+ return writeV3000 (out, mol);
456
+ }
457
+
259
458
out << setw (3 ) << std::right << mol.atomCount () << setw (3 ) << mol.bondCount ()
260
459
<< " 0 0 0 0 0 0 0 0999 V2000\n " ;
261
460
// Atom block.
@@ -269,7 +468,7 @@ bool MdlFormat::write(std::ostream& out, const Core::Molecule& mol)
269
468
: ((charge <= 3 ) ? charge : 0 );
270
469
out << setw (10 ) << std::right << std::fixed << setprecision (4 )
271
470
<< atom.position3d ().x () << setw (10 ) << atom.position3d ().y ()
272
- << setw (10 ) << atom.position3d ().z () << " " << setw (3 ) << std::left
471
+ << setw (10 ) << atom.position3d ().z () << ' ' << setw (3 ) << std::left
273
472
<< Elements::symbol (atom.atomicNumber ()) << " 0" << setw (3 )
274
473
<< std::right << chargeField /* for compatibility */
275
474
<< " 0 0 0 0 0 0 0 0 0 0\n " ;
@@ -286,7 +485,7 @@ bool MdlFormat::write(std::ostream& out, const Core::Molecule& mol)
286
485
for (auto & i : chargeList) {
287
486
Index atomIndex = i.first ;
288
487
signed int atomCharge = i.second ;
289
- out << " M CHG 1 " << setw (3 ) << std::right << atomIndex + 1 << " "
488
+ out << " M CHG 1 " << setw (3 ) << std::right << atomIndex + 1 << ' '
290
489
<< setw (3 ) << atomCharge << " \n " ;
291
490
}
292
491
// TODO: isotopes, etc.
@@ -301,7 +500,7 @@ bool MdlFormat::write(std::ostream& out, const Core::Molecule& mol)
301
500
}
302
501
}
303
502
304
- if (isMode (FileFormat::MultiMolecule))
503
+ if (m_writeProperties || isMode (FileFormat::MultiMolecule))
305
504
out << " $$$$\n " ;
306
505
307
506
return true ;
0 commit comments