Skip to content

Commit 674fbb0

Browse files
fix bug in reading abacus stru (#1714)
Fix the bug induced by the update of dpdata. Now the reading and writing of ABACUS STRU is by a more general dpdata/abacus interface, and remove some redundant codes. This commit is based on dpdata commit deepmodeling/dpdata#793, and it is valid after that commit is merged. related issue: #1711 <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **Refactor** - Streamlined the workflow for generating atomic structure data by removing redundant parameters and centralizing functionality. - Improved handling of atomic data structures for more efficient processing and organization. - **Tests** - Enhanced the precision of validations by refining comparisons to focus on key attributes, ensuring more robust result accuracy. - Corrected the method name for improved clarity in the test suite. - Added a new key to the test data structure for enhanced functionality in subsequent tests. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Co-authored-by: root <pxlxingliang> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 2158dba commit 674fbb0

File tree

7 files changed

+92
-184
lines changed

7 files changed

+92
-184
lines changed

dpgen/auto_test/lib/abacus.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import dpdata
66
import numpy as np
7-
from dpdata.abacus.scf import make_unlabeled_stru
7+
from dpdata.abacus.stru import make_unlabeled_stru
88
from dpdata.utils import uniq_atom_names
99
from dpdata.vasp import poscar as dpdata_poscar
1010

dpgen/data/gen.py

+1
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,7 @@ def stru_ele(supercell_stru, stru_out, eles, natoms, jdata, path_work):
145145
dpks_descriptor_name = os.path.basename(jdata["dpks_descriptor"])
146146
supercell_stru["atom_masses"] = jdata["atom_masses"]
147147
supercell_stru["atom_names"] = eles
148+
supercell_stru["atom_types"] = np.array(supercell_stru["types"])
148149
stru_text = make_abacus_scf_stru(
149150
supercell_stru,
150151
pp_file_names,

dpgen/data/tools/create_random_disturb.py

+1
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,7 @@ def create_disturbs_abacus_dev(
192192
stru = get_abacus_STRU(fin)
193193
natoms = sum(stru["atom_numbs"])
194194
cell0 = stru["cells"]
195+
stru["masses"] = stru["atom_masses"] # in dpdata, it is masses that should be used.
195196

196197
# creat nfile ofmt files.
197198
for fid in range(1, nfile + 1):

dpgen/generator/lib/abacus_scf.py

+72-180
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import re
44

55
import numpy as np
6-
from dpdata.abacus.scf import get_cell, get_coords, get_nele_from_stru
6+
from dpdata.abacus.stru import get_frame_from_stru, make_unlabeled_stru
77

88
from dpgen.auto_test.lib import vasp
99

@@ -208,90 +208,37 @@ def make_abacus_scf_stru(
208208
fp_pp_files,
209209
fp_orb_files=None,
210210
fp_dpks_descriptor=None,
211-
fp_params=None,
212211
type_map=None,
213212
pporb="", # pull all pp orb dpks files to pporb folder
214213
):
215-
atom_names = sys_data["atom_names"]
216-
atom_numbs = sys_data["atom_numbs"]
217-
if type_map is None:
218-
type_map = atom_names
219-
220-
assert len(atom_names) == len(atom_numbs), "Please check the name of atoms. "
221-
cell = sys_data["cells"].reshape([3, 3])
222-
coord = sys_data["coords"].reshape([sum(atom_numbs), 3])
223-
# volume = np.linalg.det(cell)
224-
# lattice_const = np.power(volume, 1/3)
225-
226-
ret = "ATOMIC_SPECIES\n"
227-
for iatom in range(len(atom_names)):
228-
assert atom_names[iatom] in type_map, (
229-
f"element {atom_names[iatom]} is not defined in type_map"
230-
)
231-
idx = type_map.index(atom_names[iatom])
232-
if "atom_masses" not in sys_data:
233-
ret += (
234-
atom_names[iatom]
235-
+ " 1.00 "
236-
+ os.path.join(pporb, fp_pp_files[idx])
237-
+ "\n"
238-
)
239-
else:
240-
ret += (
241-
atom_names[iatom]
242-
+ " {:.3f} ".format(sys_data["atom_masses"][iatom])
243-
+ os.path.join(pporb, fp_pp_files[idx])
244-
+ "\n"
245-
)
246-
247-
if fp_params is not None and "lattice_constant" in fp_params:
248-
ret += "\nLATTICE_CONSTANT\n"
249-
ret += (
250-
str(fp_params["lattice_constant"]) + "\n\n"
251-
) # in Bohr, in this way coord and cell are in Angstrom
252-
else:
253-
ret += "\nLATTICE_CONSTANT\n"
254-
ret += str(1 / bohr2ang) + "\n\n"
255-
256-
ret += "LATTICE_VECTORS\n"
257-
for ix in range(3):
258-
for iy in range(3):
259-
ret += str(cell[ix][iy]) + " "
260-
ret += "\n"
261-
ret += "\n"
262-
263-
ret += "ATOMIC_POSITIONS\n"
264-
ret += "Cartesian # Cartesian(Unit is LATTICE_CONSTANT)\n"
265-
# ret += "\n"
266-
natom_tot = 0
267-
for iele in range(len(atom_names)):
268-
ret += atom_names[iele] + "\n"
269-
ret += "0.0\n"
270-
ret += str(atom_numbs[iele]) + "\n"
271-
for iatom in range(atom_numbs[iele]):
272-
ret += "%.12f %.12f %.12f %d %d %d\n" % ( # noqa: UP031
273-
coord[natom_tot, 0],
274-
coord[natom_tot, 1],
275-
coord[natom_tot, 2],
276-
1,
277-
1,
278-
1,
279-
)
280-
natom_tot += 1
281-
assert natom_tot == sum(atom_numbs)
282-
214+
sys_data_copy = copy.deepcopy(sys_data)
215+
# re-construct the path of files by pporb + file name
216+
fp_pp_files = [os.path.join(pporb, i) for i in fp_pp_files]
283217
if fp_orb_files is not None:
284-
ret += "\nNUMERICAL_ORBITAL\n"
285-
assert len(fp_orb_files) == len(type_map)
286-
for iatom in range(len(atom_names)):
287-
idx = type_map.index(atom_names[iatom])
288-
ret += os.path.join(pporb, fp_orb_files[idx]) + "\n"
289-
218+
fp_orb_files = [os.path.join(pporb, i) for i in fp_orb_files]
290219
if fp_dpks_descriptor is not None:
291-
ret += "\nNUMERICAL_DESCRIPTOR\n"
292-
ret += os.path.join(pporb, fp_dpks_descriptor) + "\n"
220+
fp_dpks_descriptor = os.path.join(pporb, fp_dpks_descriptor)
221+
222+
# we need to make sure that the shape of cells and coords are the same
223+
# and if they are 2D, we need to convert them to 3D
224+
cells = np.array(sys_data["cells"])
225+
coords = np.array(sys_data["coords"])
226+
assert len(cells.shape) == len(coords.shape), (
227+
"cells and coords should have the same shape."
228+
)
293229

294-
return ret
230+
if len(cells.shape) == 2:
231+
sys_data_copy["cells"] = np.array([cells])
232+
sys_data_copy["coords"] = np.array([coords])
233+
c = make_unlabeled_stru(
234+
sys_data_copy,
235+
0,
236+
pp_file=fp_pp_files,
237+
numerical_orbital=fp_orb_files,
238+
numerical_descriptor=fp_dpks_descriptor,
239+
)
240+
241+
return c
295242

296243

297244
def get_abacus_input_parameters(INPUT):
@@ -306,106 +253,37 @@ def get_abacus_input_parameters(INPUT):
306253
return input_parameters
307254

308255

309-
def get_mass_from_STRU(geometry_inlines, atom_names):
310-
nele = get_nele_from_stru(geometry_inlines)
311-
assert nele > 0
312-
mass_list = [0 for i in atom_names]
313-
pp_file_list = [i for i in atom_names]
314-
for iline, line in enumerate(geometry_inlines):
315-
if line.split() == []:
316-
continue
317-
if "ATOMIC_SPECIES" == line.split()[0]:
318-
for iele1 in range(1, 1 + nele):
319-
for iele2 in range(nele):
320-
if geometry_inlines[iline + iele1].split()[0] == atom_names[iele2]:
321-
mass_list[iele2] = float(
322-
geometry_inlines[iline + iele1].split()[1]
323-
)
324-
pp_file_list[iele2] = geometry_inlines[iline + iele1].split()[2]
325-
for iele in range(len(mass_list)):
326-
assert mass_list[iele] > 0
327-
return mass_list, pp_file_list
328-
329-
330-
def get_natoms_from_stru(geometry_inlines):
331-
key_words_list = [
332-
"ATOMIC_SPECIES",
333-
"NUMERICAL_ORBITAL",
334-
"LATTICE_CONSTANT",
335-
"LATTICE_VECTORS",
336-
"ATOMIC_POSITIONS",
337-
"NUMERICAL_DESCRIPTOR",
338-
]
339-
keyword_sequence = []
340-
keyword_line_index = []
341-
atom_names = []
342-
atom_numbs = []
343-
tmp_line = []
344-
for i in geometry_inlines:
345-
if i.strip() != "":
346-
tmp_line.append(i)
347-
for iline, line in enumerate(tmp_line):
348-
if line.split() == []:
349-
continue
350-
have_key_word = False
351-
for keyword in key_words_list:
352-
if keyword in line and keyword == line.split()[0]:
353-
keyword_sequence.append(keyword)
354-
keyword_line_index.append(iline)
355-
assert len(keyword_line_index) == len(keyword_sequence)
356-
assert len(keyword_sequence) > 0
357-
keyword_line_index.append(len(tmp_line))
358-
for idx, keyword in enumerate(keyword_sequence):
359-
if keyword == "ATOMIC_POSITIONS":
360-
iline = keyword_line_index[idx] + 2
361-
while iline < keyword_line_index[idx + 1] - 1:
362-
atom_names.append(tmp_line[iline].split()[0])
363-
atom_numbs.append(int(tmp_line[iline + 2].split()[0]))
364-
iline += 3 + atom_numbs[-1]
365-
return atom_names, atom_numbs
366-
367-
368-
def get_additional_from_STRU(geometry_inlines, nele):
369-
dpks_descriptor_kw = "NUMERICAL_DESCRIPTOR"
370-
orb_file_kw = "NUMERICAL_ORBITAL"
371-
dpks_descriptor = None
372-
orb_file = None
373-
for iline in range(len(geometry_inlines)):
374-
if len(geometry_inlines[iline]) > 0:
375-
if orb_file_kw == geometry_inlines[iline].split()[0]:
376-
orb_file = []
377-
for iele in range(nele):
378-
orb_file.append(geometry_inlines[iline + iele + 1].strip())
379-
if dpks_descriptor_kw == geometry_inlines[iline].split()[0]:
380-
dpks_descriptor = geometry_inlines[iline + 1].strip()
381-
return orb_file, dpks_descriptor
382-
383-
384-
def get_abacus_STRU(STRU, INPUT=None, n_ele=None):
385-
# read in geometry from STRU file. n_ele is the number of elements.
386-
# Either n_ele or INPUT should be provided.
387-
with open(STRU) as fp:
388-
geometry_inlines = fp.read().split("\n")
389-
for iline, line in enumerate(geometry_inlines):
390-
if line.split() == [] or len(line) == 0:
391-
del geometry_inlines[iline]
392-
geometry_inlines.append("")
393-
celldm, cell = get_cell(geometry_inlines)
394-
atom_names, natoms, types, coords = get_coords(celldm, cell, geometry_inlines)
395-
masses, pp_files = get_mass_from_STRU(geometry_inlines, atom_names)
396-
orb_files, dpks_descriptor = get_additional_from_STRU(geometry_inlines, len(masses))
397-
data = {}
398-
data["atom_names"] = atom_names
399-
data["atom_numbs"] = natoms
400-
data["atom_types"] = types
401-
data["cells"] = cell
402-
data["coords"] = coords
403-
data["atom_masses"] = (
404-
masses # Notice that this key is not defined in dpdata system.
405-
)
406-
data["pp_files"] = pp_files
407-
data["orb_files"] = orb_files
408-
data["dpks_descriptor"] = dpks_descriptor
256+
def get_abacus_STRU(STRU):
257+
"""Read STRU file and return a dictionary containing the structure information.
258+
259+
Args:
260+
STRU (str): The path of STRU file.
261+
262+
Returns
263+
-------
264+
dict: A dictionary containing the structure information.
265+
{
266+
"atom_names": list of str,
267+
"atom_numbs": list of int,
268+
"atom_masses": list of float,
269+
"coords": np.ndarray,
270+
"cells": np.ndarray,
271+
"pp_files": list of str,
272+
"orb_files": list of str,
273+
"dpks_descriptor": str,
274+
}
275+
"""
276+
data = get_frame_from_stru(STRU)
277+
data["atom_masses"] = data.pop("masses")
278+
data["cells"] = data.pop("cells")[0]
279+
data["coords"] = data.pop("coords")[0]
280+
assert "pp_files" in data, "pp_files should be provided in STRU file."
281+
if None in data["pp_files"]:
282+
data["pp_files"] = None
283+
if "orb_files" not in data:
284+
data["orb_files"] = None
285+
if "dpks_descriptor" not in data:
286+
data["dpks_descriptor"] = None
409287
return data
410288

411289

@@ -419,7 +297,21 @@ def make_supercell_abacus(from_struct, super_cell):
419297
# )
420298
for idx_atm in from_struct["atom_types"]:
421299
new_types += [idx_atm] * super_cell[0] * super_cell[1] * super_cell[2]
422-
to_struct["atom_types"] = new_types
300+
to_struct["atom_types"] = np.array(new_types)
301+
302+
# expand move, spins
303+
for ikey in ["move", "spins"]:
304+
if ikey in from_struct:
305+
new_list = []
306+
for ia in range(sum(from_struct["atom_numbs"])):
307+
new_list += (
308+
[from_struct[ikey][0][ia]]
309+
* super_cell[0]
310+
* super_cell[1]
311+
* super_cell[2]
312+
)
313+
to_struct[ikey] = np.array([new_list])
314+
423315
to_atom_num = (
424316
sum(from_struct["atom_numbs"]) * super_cell[0] * super_cell[1] * super_cell[2]
425317
)

dpgen/generator/run.py

-1
Original file line numberDiff line numberDiff line change
@@ -3569,7 +3569,6 @@ def make_fp_abacus_scf(iter_index, jdata):
35693569
fp_pp_files,
35703570
fp_orb_files,
35713571
fp_dpks_descriptor,
3572-
fp_params,
35733572
type_map=jdata["type_map"],
35743573
pporb=pporb_path,
35753574
)

tests/auto_test/test_abacus.py

+16-2
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ def test_make_input_file_kspacing_three_value(self):
216216
kpt = f1.read().strip().split("\n")[-1].split()
217217
self.assertEqual(kpt, ["9", "5", "3", "0", "0", "0"])
218218

219-
def test_compuate(self):
219+
def test_compute(self):
220220
ret = self.ABACUS.compute(os.path.join(self.equi_path))
221221
self.assertIsNone(ret)
222222
shutil.copy(
@@ -247,7 +247,21 @@ def compare_dict(dict1, dict2):
247247
else:
248248
self.assertTrue(dict1[key] == dict2[key])
249249

250-
compare_dict(ret, ret_ref.as_dict())
250+
compare_keys = [
251+
"atom_numbs",
252+
"atom_names",
253+
"atom_types",
254+
"cells",
255+
"coords",
256+
"energies",
257+
"forces",
258+
"virials",
259+
"stress",
260+
]
261+
compare_dict(
262+
{k: ret["data"][k] for k in compare_keys},
263+
{k: ret_ref.data[k] for k in compare_keys},
264+
)
251265

252266

253267
class TestABACUSDeepKS(unittest.TestCase):

tests/data/test_gen_bulk_abacus.py

+1
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ def test(self):
7676
def testSTRU(self):
7777
jdata = self.jdata
7878
jdata["from_poscar_path"] = "./Cu.STRU"
79+
jdata["potcars"] = ["abacus.in/Cu_ONCV_PBE-1.0.upf"]
7980
make_super_cell_STRU(jdata)
8081
make_abacus_relax(jdata, {"fp_resources": {}})
8182
make_scale_ABACUS(jdata)

0 commit comments

Comments
 (0)