From bf27d69004dca19fdf8d6c00d02de3375cdbdc84 Mon Sep 17 00:00:00 2001 From: Han Lin Mai Date: Mon, 18 Sep 2023 14:42:43 +0200 Subject: [PATCH 01/12] Voronoi featuriser pymatgen --- structuretoolkit/analyse/pymatgen.py | 73 ++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 structuretoolkit/analyse/pymatgen.py diff --git a/structuretoolkit/analyse/pymatgen.py b/structuretoolkit/analyse/pymatgen.py new file mode 100644 index 000000000..b24e3ebcc --- /dev/null +++ b/structuretoolkit/analyse/pymatgen.py @@ -0,0 +1,73 @@ +from pymatgen.io.ase import AseAtomsAdaptor +from pymatgen.analysis.local_env import VoronoiNN +from pymatgen.core import Structure, Element +import numpy as np +import pandas as pd + +def get_stats(property_list, property_str): + """ + Calculate statistical properties of a list of values. + Parameters: + property_list (list): A list of numerical values for which statistics are calculated. + property_str (str): A string prefix to be used in the resulting statistical property names. + Returns: + dict: A dictionary containing statistical properties with keys in the format: + "{property_str}_{statistic}" where statistic can be "std" (standard deviation), + "mean" (mean), "min" (minimum), and "max" (maximum). + Example: + >>> values = [1, 2, 3, 4, 5] + >>> get_stats(values, "example") + {'example_std': 1.4142135623730951, + 'example_mean': 3.0, + 'example_min': 1, + 'example_max': 5} + """ + return { + f"{property_str}_std": np.std(property_list), + f"{property_str}_mean": np.mean(property_list), + f"{property_str}_min": np.min(property_list), + f"{property_str}_max": np.max(property_list) + } + +def VoronoiSiteFeaturiser(structure, site): + """ + Calculate various Voronoi-related features for a specific site in a crystal structure. + Parameters: + structure (ase.Atoms or pymatgen.Structure): The crystal structure. + site (int): The index of the site in the crystal structure. + Returns: + pandas.DataFrame: A DataFrame containing computed Voronoi features for the specified site. + Columns include VorNN_CoordNo, VorNN_tot_vol, VorNN_tot_area, as well as + statistics for volumes, vertices, areas, and distances. + Example: + >>> from pymatgen import Structure + >>> structure = Structure.from_file("example.cif") + >>> VoronoiSiteFeaturiser(structure, 0) + VorNN_CoordNo VorNN_tot_vol VorNN_tot_area volumes_std volumes_mean ... + 0 7.0 34.315831 61.556747 10.172586 34.315831 ... + """ + structure = AseAtomsAdaptor().get_structure(structure) + coord_no = VoronoiNN().get_cn(structure=structure, n=site) + site_info_dict = VoronoiNN().get_voronoi_polyhedra(structure, site) + volumes = [site_info_dict[polyhedra]["volume"] for polyhedra in site_info_dict] + vertices = [site_info_dict[polyhedra]["n_verts"] for polyhedra in site_info_dict] + distances = [site_info_dict[polyhedra]["face_dist"] for polyhedra in site_info_dict] + areas = [site_info_dict[polyhedra]["area"] for polyhedra in site_info_dict] + + total_area = np.sum(areas) + total_volume = np.sum(volumes) + + data = { + "VorNN_CoordNo": coord_no, + "VorNN_tot_vol": total_volume, + "VorNN_tot_area": total_area + } + + data_str_list = ["volumes", "vertices", "areas", "distances"] + + for i, value_list in enumerate([volumes, vertices, areas, distances]): + stats = get_stats(value_list, f"VorNN_{data_str_list[i]}") + data.update(stats) + + df = pd.DataFrame(data, index=[site]) + return df \ No newline at end of file From 780bfba3abc5640b39c2740a3dd32c9e99daee16 Mon Sep 17 00:00:00 2001 From: Han Lin Mai Date: Mon, 18 Sep 2023 14:47:46 +0200 Subject: [PATCH 02/12] Add test --- tests/test_pymatgen.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/tests/test_pymatgen.py b/tests/test_pymatgen.py index 1fc887975..68a15d871 100644 --- a/tests/test_pymatgen.py +++ b/tests/test_pymatgen.py @@ -196,3 +196,41 @@ def test_pyiron_to_pymatgen_conversion(self): ), "Failed to produce equivalent sel_dyn when both magmom + sel_dyn are present!", ) + +class TestVoronoiSiteFeaturiser(unittest.TestCase): + def setUp(self): + self.example_structure = bulk("Fe") + + def test_VoronoiSiteFeaturiser(self): + # Calculate the expected output manually + expected_output = { + "VorNN_CoordNo": 14, + "VorNN_tot_vol": 11.819951, + "VorNN_tot_area": 27.577769, + "VorNN_volumes_std": 0.304654, + "VorNN_volumes_mean": 0.844282, + "VorNN_volumes_min": 0.492498, + "VorNN_volumes_max": 1.10812, + "VorNN_vertices_std": 0.989743, + "VorNN_vertices_mean": 5.142857, + "VorNN_vertices_min": 4, + "VorNN_vertices_max": 6, + "VorNN_areas_std": 0.814261, + "VorNN_areas_mean": 1.969841, + "VorNN_areas_min": 1.029612, + "VorNN_areas_max": 2.675012, + "VorNN_distances_std": 0.095141, + "VorNN_distances_mean": 1.325141, + "VorNN_distances_min": 1.242746, + "VorNN_distances_max": 1.435 + } + + # Call the function with the example structure + df = VoronoiSiteFeaturiser(self.example_structure, 0) + + # Define the tolerance for approximate equality (up to 4 decimal places) + tolerance = 1e-4 + + # Check that the DataFrame matches the expected output with the specified tolerance + for column, expected_value in expected_output.items(): + self.assertAlmostEqual(df[column], expected_value, delta=tolerance) \ No newline at end of file From 66ebfc28683f2f03f3638b0fc7ec47d51a91861a Mon Sep 17 00:00:00 2001 From: Han Lin Mai Date: Mon, 18 Sep 2023 14:52:32 +0200 Subject: [PATCH 03/12] forgot import statement --- tests/test_pymatgen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_pymatgen.py b/tests/test_pymatgen.py index 68a15d871..e89cb9386 100644 --- a/tests/test_pymatgen.py +++ b/tests/test_pymatgen.py @@ -3,7 +3,7 @@ from ase.build import bulk from ase.constraints import FixAtoms from structuretoolkit.common import pymatgen_to_ase, ase_to_pymatgen - +from structuretoolkit.analyse.pymatgen import VoronoiFeaturiser try: from pymatgen.core import Structure, Lattice From 90992b345a2fea0a42865f1dce182876d76519c2 Mon Sep 17 00:00:00 2001 From: Han Lin Mai Date: Mon, 18 Sep 2023 14:57:07 +0200 Subject: [PATCH 04/12] import statement --- tests/test_pymatgen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_pymatgen.py b/tests/test_pymatgen.py index e89cb9386..2d211bfc0 100644 --- a/tests/test_pymatgen.py +++ b/tests/test_pymatgen.py @@ -3,7 +3,7 @@ from ase.build import bulk from ase.constraints import FixAtoms from structuretoolkit.common import pymatgen_to_ase, ase_to_pymatgen -from structuretoolkit.analyse.pymatgen import VoronoiFeaturiser +from structuretoolkit.analyse.pymatgen import VoronoiSiteFeaturiser try: from pymatgen.core import Structure, Lattice From b561944b70c5199a4a8b7f93b2945f06cdaa0ee0 Mon Sep 17 00:00:00 2001 From: Han Lin Mai Date: Mon, 18 Sep 2023 14:57:10 +0200 Subject: [PATCH 05/12] import statement --- .github/asdf.ipynb | 273 +++++++++++++++++++++++++ .github/developVoronoiFeaturiser.ipynb | 244 ++++++++++++++++++++++ tests/pyiron.log | 0 3 files changed, 517 insertions(+) create mode 100644 .github/asdf.ipynb create mode 100644 .github/developVoronoiFeaturiser.ipynb create mode 100644 tests/pyiron.log diff --git a/.github/asdf.ipynb b/.github/asdf.ipynb new file mode 100644 index 000000000..6e5c37310 --- /dev/null +++ b/.github/asdf.ipynb @@ -0,0 +1,273 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from dscribe.descriptors.soap import SOAP" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_soap_descriptor_per_atom(\n", + " structure,\n", + " r_cut=None,\n", + " n_max=None,\n", + " l_max=None,\n", + " sigma=1.0,\n", + " rbf=\"gto\",\n", + " weighting=None,\n", + " average=\"off\",\n", + " compression={\"mode\": \"off\", \"species_weighting\": None},\n", + " species=None,\n", + " periodic=True,\n", + " sparse=False,\n", + " dtype=\"float64\",\n", + " centers=None,\n", + " n_jobs=1,\n", + " only_physical_cores=False,\n", + " verbose=False,\n", + "):\n", + " from dscribe.descriptors import SOAP\n", + "\n", + " # if species is None:\n", + " # species = list(set(structure.get_chemical_symbols()))\n", + " periodic_soap = SOAP(\n", + " r_cut=r_cut,\n", + " n_max=n_max,\n", + " l_max=l_max,\n", + " sigma=sigma,\n", + " rbf=rbf,\n", + " weighting=weighting,\n", + " average=average,\n", + " compression=compression,\n", + " species=species,\n", + " periodic=periodic,\n", + " sparse=sparse,\n", + " dtype=dtype,\n", + " )\n", + " return periodic_soap.create(\n", + " system=structure,\n", + " centers=centers,\n", + " n_jobs=n_jobs,\n", + " only_physical_cores=only_physical_cores,\n", + " verbose=verbose,\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "from ase import build" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "struct = build.bulk(\"Fe\", cubic=True)\n", + "len(struct)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Atoms(symbols='Fe2', pbc=True, cell=[2.87, 2.87, 2.87])" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "struct" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Please provide the species as an iterable, e.g. a list.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/root/personal_python_utilities/structuretoolkit/.github/asdf.ipynb Cell 6\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m a \u001b[39m=\u001b[39m calculate_soap_descriptor_per_atom(pmg_st,\n\u001b[1;32m 2\u001b[0m r_cut\u001b[39m=\u001b[39;49m\u001b[39m5\u001b[39;49m,\n\u001b[1;32m 3\u001b[0m n_max\u001b[39m=\u001b[39;49m\u001b[39m10\u001b[39;49m,\n\u001b[1;32m 4\u001b[0m l_max\u001b[39m=\u001b[39;49m\u001b[39m10\u001b[39;49m,)\n\u001b[1;32m 5\u001b[0m \u001b[39mtype\u001b[39m(a)\n", + "\u001b[1;32m/root/personal_python_utilities/structuretoolkit/.github/asdf.ipynb Cell 6\u001b[0m line \u001b[0;36m2\n\u001b[1;32m 20\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mdscribe\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mdescriptors\u001b[39;00m \u001b[39mimport\u001b[39;00m SOAP\n\u001b[1;32m 22\u001b[0m \u001b[39m# if species is None:\u001b[39;00m\n\u001b[1;32m 23\u001b[0m \u001b[39m# species = list(set(structure.get_chemical_symbols()))\u001b[39;00m\n\u001b[0;32m---> 24\u001b[0m periodic_soap \u001b[39m=\u001b[39m SOAP(\n\u001b[1;32m 25\u001b[0m r_cut\u001b[39m=\u001b[39;49mr_cut,\n\u001b[1;32m 26\u001b[0m n_max\u001b[39m=\u001b[39;49mn_max,\n\u001b[1;32m 27\u001b[0m l_max\u001b[39m=\u001b[39;49ml_max,\n\u001b[1;32m 28\u001b[0m sigma\u001b[39m=\u001b[39;49msigma,\n\u001b[1;32m 29\u001b[0m rbf\u001b[39m=\u001b[39;49mrbf,\n\u001b[1;32m 30\u001b[0m weighting\u001b[39m=\u001b[39;49mweighting,\n\u001b[1;32m 31\u001b[0m average\u001b[39m=\u001b[39;49maverage,\n\u001b[1;32m 32\u001b[0m compression\u001b[39m=\u001b[39;49mcompression,\n\u001b[1;32m 33\u001b[0m species\u001b[39m=\u001b[39;49mspecies,\n\u001b[1;32m 34\u001b[0m periodic\u001b[39m=\u001b[39;49mperiodic,\n\u001b[1;32m 35\u001b[0m sparse\u001b[39m=\u001b[39;49msparse,\n\u001b[1;32m 36\u001b[0m dtype\u001b[39m=\u001b[39;49mdtype,\n\u001b[1;32m 37\u001b[0m )\n\u001b[1;32m 38\u001b[0m \u001b[39mreturn\u001b[39;00m periodic_soap\u001b[39m.\u001b[39mcreate(\n\u001b[1;32m 39\u001b[0m system\u001b[39m=\u001b[39mstructure,\n\u001b[1;32m 40\u001b[0m centers\u001b[39m=\u001b[39mcenters,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 43\u001b[0m verbose\u001b[39m=\u001b[39mverbose,\n\u001b[1;32m 44\u001b[0m )\n", + "File \u001b[0;32m~/miniconda3/envs/dscribe/lib/python3.11/site-packages/dscribe/descriptors/soap.py:203\u001b[0m, in \u001b[0;36mSOAP.__init__\u001b[0;34m(self, r_cut, n_max, l_max, sigma, rbf, weighting, average, compression, species, periodic, sparse, dtype)\u001b[0m\n\u001b[1;32m 200\u001b[0m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(periodic\u001b[39m=\u001b[39mperiodic, sparse\u001b[39m=\u001b[39msparse, dtype\u001b[39m=\u001b[39mdtype)\n\u001b[1;32m 202\u001b[0m \u001b[39m# Setup the involved chemical species\u001b[39;00m\n\u001b[0;32m--> 203\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mspecies \u001b[39m=\u001b[39m species\n\u001b[1;32m 205\u001b[0m \u001b[39m# Test that general settings are valid\u001b[39;00m\n\u001b[1;32m 206\u001b[0m \u001b[39mif\u001b[39;00m sigma \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m \u001b[39m0\u001b[39m:\n", + "File \u001b[0;32m~/miniconda3/envs/dscribe/lib/python3.11/site-packages/dscribe/descriptors/soap.py:837\u001b[0m, in \u001b[0;36mSOAP.species\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m 829\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Used to check the validity of given atomic numbers and to initialize\u001b[39;00m\n\u001b[1;32m 830\u001b[0m \u001b[39mthe C-memory layout for them.\u001b[39;00m\n\u001b[1;32m 831\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 834\u001b[0m \u001b[39m numbers or list of chemical symbols.\u001b[39;00m\n\u001b[1;32m 835\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 836\u001b[0m \u001b[39m# The species are stored as atomic numbers for internal use.\u001b[39;00m\n\u001b[0;32m--> 837\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_set_species(value)\n\u001b[1;32m 839\u001b[0m \u001b[39m# Setup mappings between atom indices and types\u001b[39;00m\n\u001b[1;32m 840\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39matomic_number_to_index \u001b[39m=\u001b[39m {}\n", + "File \u001b[0;32m~/miniconda3/envs/dscribe/lib/python3.11/site-packages/dscribe/descriptors/descriptor.py:122\u001b[0m, in \u001b[0;36mDescriptor._set_species\u001b[0;34m(self, species)\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Used to setup the species information for this descriptor. This\u001b[39;00m\n\u001b[1;32m 114\u001b[0m \u001b[39minformation includes an ordered list of unique atomic numbers, a set\u001b[39;00m\n\u001b[1;32m 115\u001b[0m \u001b[39mof atomic numbers and the original variable contents.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 119\u001b[0m \u001b[39m numbers or list of chemical symbols.\u001b[39;00m\n\u001b[1;32m 120\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 121\u001b[0m \u001b[39m# The species are stored as atomic numbers for internal use.\u001b[39;00m\n\u001b[0;32m--> 122\u001b[0m atomic_numbers \u001b[39m=\u001b[39m get_atomic_numbers(species)\n\u001b[1;32m 123\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_atomic_numbers \u001b[39m=\u001b[39m atomic_numbers\n\u001b[1;32m 124\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_atomic_number_set \u001b[39m=\u001b[39m \u001b[39mset\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_atomic_numbers)\n", + "File \u001b[0;32m~/miniconda3/envs/dscribe/lib/python3.11/site-packages/dscribe/utils/species.py:59\u001b[0m, in \u001b[0;36mget_atomic_numbers\u001b[0;34m(species)\u001b[0m\n\u001b[1;32m 57\u001b[0m is_string \u001b[39m=\u001b[39m \u001b[39misinstance\u001b[39m(species, (\u001b[39mstr\u001b[39m, np\u001b[39m.\u001b[39mstr_))\n\u001b[1;32m 58\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m is_iterable \u001b[39mor\u001b[39;00m is_string:\n\u001b[0;32m---> 59\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mPlease provide the species as an iterable, e.g. a list.\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 61\u001b[0m \u001b[39m# Determine if the given species are atomic numbers or chemical symbols\u001b[39;00m\n\u001b[1;32m 62\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mall\u001b[39m(\u001b[39misinstance\u001b[39m(x, (\u001b[39mint\u001b[39m, np\u001b[39m.\u001b[39minteger)) \u001b[39mfor\u001b[39;00m x \u001b[39min\u001b[39;00m species):\n", + "\u001b[0;31mValueError\u001b[0m: Please provide the species as an iterable, e.g. a list." + ] + } + ], + "source": [ + "a = calculate_soap_descriptor_per_atom(pmg_st,\n", + " r_cut=5,\n", + " n_max=10,\n", + " l_max=10,)\n", + "type(a)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(a)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "605" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(a[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "from pymatgen.io.ase import AseAtomsAdaptor\n", + "\n", + "pmg_st = AseAtomsAdaptor().get_structure(struct)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "'Structure' object has no attribute 'get_chemical_symbols'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/root/personal_python_utilities/structuretoolkit/.github/asdf.ipynb Cell 9\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m a \u001b[39m=\u001b[39m calculate_soap_descriptor_per_atom(pmg_st,\n\u001b[1;32m 2\u001b[0m r_cut\u001b[39m=\u001b[39;49m\u001b[39m5\u001b[39;49m,\n\u001b[1;32m 3\u001b[0m n_max\u001b[39m=\u001b[39;49m\u001b[39m10\u001b[39;49m,\n\u001b[1;32m 4\u001b[0m l_max\u001b[39m=\u001b[39;49m\u001b[39m10\u001b[39;49m,)\n\u001b[1;32m 5\u001b[0m \u001b[39mtype\u001b[39m(a)\n", + "\u001b[1;32m/root/personal_python_utilities/structuretoolkit/.github/asdf.ipynb Cell 9\u001b[0m line \u001b[0;36m2\n\u001b[1;32m 20\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mdscribe\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mdescriptors\u001b[39;00m \u001b[39mimport\u001b[39;00m SOAP\n\u001b[1;32m 22\u001b[0m \u001b[39mif\u001b[39;00m species \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m---> 23\u001b[0m species \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(\u001b[39mset\u001b[39m(structure\u001b[39m.\u001b[39;49mget_chemical_symbols()))\n\u001b[1;32m 24\u001b[0m periodic_soap \u001b[39m=\u001b[39m SOAP(\n\u001b[1;32m 25\u001b[0m r_cut\u001b[39m=\u001b[39mr_cut,\n\u001b[1;32m 26\u001b[0m n_max\u001b[39m=\u001b[39mn_max,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 36\u001b[0m dtype\u001b[39m=\u001b[39mdtype,\n\u001b[1;32m 37\u001b[0m )\n\u001b[1;32m 38\u001b[0m \u001b[39mreturn\u001b[39;00m periodic_soap\u001b[39m.\u001b[39mcreate(\n\u001b[1;32m 39\u001b[0m system\u001b[39m=\u001b[39mstructure,\n\u001b[1;32m 40\u001b[0m centers\u001b[39m=\u001b[39mcenters,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 43\u001b[0m verbose\u001b[39m=\u001b[39mverbose,\n\u001b[1;32m 44\u001b[0m )\n", + "\u001b[0;31mAttributeError\u001b[0m: 'Structure' object has no attribute 'get_chemical_symbols'" + ] + } + ], + "source": [ + "a = calculate_soap_descriptor_per_atom(pmg_st,\n", + " r_cut=5,\n", + " n_max=10,\n", + " l_max=10,)\n", + "type(a)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'pandas'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", + "\u001b[1;32m/root/personal_python_utilities/structuretoolkit/.github/asdf.ipynb Cell 6\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mpandas\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mpd\u001b[39;00m\n\u001b[1;32m 2\u001b[0m b \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mDataFrame(a)\n", + "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pandas'" + ] + } + ], + "source": [ + "import pandas as pd\n", + "b = pd.DataFrame(a)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dscribe", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/.github/developVoronoiFeaturiser.ipynb b/.github/developVoronoiFeaturiser.ipynb new file mode 100644 index 000000000..bd4acf048 --- /dev/null +++ b/.github/developVoronoiFeaturiser.ipynb @@ -0,0 +1,244 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from pymatgen.io.ase import AseAtomsAdaptor\n", + "from pymatgen.analysis.local_env import VoronoiNN\n", + "from pymatgen.core import Structure\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from ase.build import bulk" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def get_stats(property_list, property_str):\n", + " return [f\"{property_str}_std\",f\"{property_str}_mean\",f\"{property_str}_min\",f\"{property_str}_max\"],\\\n", + " [np.std(property_list), np.mean(property_list), np.min(property_list), np.max(property_list)]\n", + " \n", + "def VoronoiSiteFeaturiser(structure, site):\n", + " structure = AseAtomsAdaptor().get_structure(structure)\n", + " coord_no = VoronoiNN().get_cn(structure = structure, n = site)\n", + " site_info_dict = VoronoiNN().get_voronoi_polyhedra(structure, site)\n", + " volumes = [site_info_dict[polyhedra][\"volume\"] for polyhedra in list(site_info_dict.keys())]\n", + " vertices = [site_info_dict[polyhedra][\"n_verts\"] for polyhedra in list(site_info_dict.keys())]\n", + " distances = [site_info_dict[polyhedra][\"face_dist\"] for polyhedra in list(site_info_dict.keys())]\n", + " areas = [site_info_dict[polyhedra][\"area\"] for polyhedra in list(site_info_dict.keys())]\n", + " \n", + " total_area = np.sum(areas)\n", + " total_volume = np.sum(volumes)\n", + " \n", + " df_str_list = [\"VorNN_CoordNo\", \"VorNN_tot_vol\", \"VorNN_tot_area\"]\n", + " df_prop_list = [coord_no, total_volume, total_area]\n", + " \n", + " data_str_list = [\"volumes\", \"vertices\", \"areas\", \"distances\"]\n", + "\n", + " for i, value_list in enumerate([volumes, vertices, areas, distances]):\n", + " property_str_list, property_stats_list = get_stats(value_list, f\"VorNN_{data_str_list[i]}\")\n", + " df_str_list += property_str_list\n", + " df_prop_list += property_stats_list\n", + " \n", + " return df_str_list, df_prop_list" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from pymatgen.analysis.local_env import VoronoiNN\n", + "from pymatgen.core.structure import Structure\n", + "from pymatgen.io.ase import AseAtomsAdaptor\n", + "\n", + "def get_stats(property_list, property_str):\n", + " return {\n", + " f\"{property_str}_std\": np.std(property_list),\n", + " f\"{property_str}_mean\": np.mean(property_list),\n", + " f\"{property_str}_min\": np.min(property_list),\n", + " f\"{property_str}_max\": np.max(property_list)\n", + " }\n", + "\n", + "def VoronoiSiteFeaturiser(structure, site):\n", + " structure = AseAtomsAdaptor().get_structure(structure)\n", + " coord_no = VoronoiNN().get_cn(structure=structure, n=site)\n", + " site_info_dict = VoronoiNN().get_voronoi_polyhedra(structure, site)\n", + " volumes = [site_info_dict[polyhedra][\"volume\"] for polyhedra in site_info_dict]\n", + " vertices = [site_info_dict[polyhedra][\"n_verts\"] for polyhedra in site_info_dict]\n", + " distances = [site_info_dict[polyhedra][\"face_dist\"] for polyhedra in site_info_dict]\n", + " areas = [site_info_dict[polyhedra][\"area\"] for polyhedra in site_info_dict]\n", + "\n", + " total_area = np.sum(areas)\n", + " total_volume = np.sum(volumes)\n", + "\n", + " data = {\n", + " \"VorNN_CoordNo\": coord_no,\n", + " \"VorNN_tot_vol\": total_volume,\n", + " \"VorNN_tot_area\": total_area\n", + " }\n", + "\n", + " data_str_list = [\"volumes\", \"vertices\", \"areas\", \"distances\"]\n", + "\n", + " for i, value_list in enumerate([volumes, vertices, areas, distances]):\n", + " stats = get_stats(value_list, f\"VorNN_{data_str_list[i]}\")\n", + " data.update(stats)\n", + "\n", + " df = pd.DataFrame(data, index=[site])\n", + " return df\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "struct = bulk(\"Fe\", cubic=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
VorNN_CoordNoVorNN_tot_volVorNN_tot_areaVorNN_volumes_stdVorNN_volumes_meanVorNN_volumes_minVorNN_volumes_maxVorNN_vertices_stdVorNN_vertices_meanVorNN_vertices_minVorNN_vertices_maxVorNN_areas_stdVorNN_areas_meanVorNN_areas_minVorNN_areas_maxVorNN_distances_stdVorNN_distances_meanVorNN_distances_minVorNN_distances_max
11411.81995127.5777690.3046540.8442820.4924981.108120.9897435.142857460.8142611.9698411.0296122.6750120.0951411.3251411.2427461.435
\n", + "
" + ], + "text/plain": [ + " VorNN_CoordNo VorNN_tot_vol VorNN_tot_area VorNN_volumes_std \n", + "1 14 11.819951 27.577769 0.304654 \\\n", + "\n", + " VorNN_volumes_mean VorNN_volumes_min VorNN_volumes_max \n", + "1 0.844282 0.492498 1.10812 \\\n", + "\n", + " VorNN_vertices_std VorNN_vertices_mean VorNN_vertices_min \n", + "1 0.989743 5.142857 4 \\\n", + "\n", + " VorNN_vertices_max VorNN_areas_std VorNN_areas_mean VorNN_areas_min \n", + "1 6 0.814261 1.969841 1.029612 \\\n", + "\n", + " VorNN_areas_max VorNN_distances_std VorNN_distances_mean \n", + "1 2.675012 0.095141 1.325141 \\\n", + "\n", + " VorNN_distances_min VorNN_distances_max \n", + "1 1.242746 1.435 " + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "struct = bulk(\"Fe\", cubic=True)\n", + "df = VoronoiSiteFeaturiser(struct, 1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "pyiron", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.10" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/pyiron.log b/tests/pyiron.log new file mode 100644 index 000000000..e69de29bb From 9533fcdf3a9ad9c2e17670b97e22a509dc20479b Mon Sep 17 00:00:00 2001 From: Han Lin Mai Date: Mon, 18 Sep 2023 14:58:18 +0200 Subject: [PATCH 06/12] remove useless files --- .github/asdf.ipynb | 273 ------------------------- .github/developVoronoiFeaturiser.ipynb | 244 ---------------------- tests/pyiron.log | 0 3 files changed, 517 deletions(-) delete mode 100644 .github/asdf.ipynb delete mode 100644 .github/developVoronoiFeaturiser.ipynb delete mode 100644 tests/pyiron.log diff --git a/.github/asdf.ipynb b/.github/asdf.ipynb deleted file mode 100644 index 6e5c37310..000000000 --- a/.github/asdf.ipynb +++ /dev/null @@ -1,273 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from dscribe.descriptors.soap import SOAP" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [], - "source": [ - "def calculate_soap_descriptor_per_atom(\n", - " structure,\n", - " r_cut=None,\n", - " n_max=None,\n", - " l_max=None,\n", - " sigma=1.0,\n", - " rbf=\"gto\",\n", - " weighting=None,\n", - " average=\"off\",\n", - " compression={\"mode\": \"off\", \"species_weighting\": None},\n", - " species=None,\n", - " periodic=True,\n", - " sparse=False,\n", - " dtype=\"float64\",\n", - " centers=None,\n", - " n_jobs=1,\n", - " only_physical_cores=False,\n", - " verbose=False,\n", - "):\n", - " from dscribe.descriptors import SOAP\n", - "\n", - " # if species is None:\n", - " # species = list(set(structure.get_chemical_symbols()))\n", - " periodic_soap = SOAP(\n", - " r_cut=r_cut,\n", - " n_max=n_max,\n", - " l_max=l_max,\n", - " sigma=sigma,\n", - " rbf=rbf,\n", - " weighting=weighting,\n", - " average=average,\n", - " compression=compression,\n", - " species=species,\n", - " periodic=periodic,\n", - " sparse=sparse,\n", - " dtype=dtype,\n", - " )\n", - " return periodic_soap.create(\n", - " system=structure,\n", - " centers=centers,\n", - " n_jobs=n_jobs,\n", - " only_physical_cores=only_physical_cores,\n", - " verbose=verbose,\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "metadata": {}, - "outputs": [], - "source": [ - "from ase import build" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "struct = build.bulk(\"Fe\", cubic=True)\n", - "len(struct)" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Atoms(symbols='Fe2', pbc=True, cell=[2.87, 2.87, 2.87])" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "struct" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "metadata": {}, - "outputs": [ - { - "ename": "ValueError", - "evalue": "Please provide the species as an iterable, e.g. a list.", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m/root/personal_python_utilities/structuretoolkit/.github/asdf.ipynb Cell 6\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m a \u001b[39m=\u001b[39m calculate_soap_descriptor_per_atom(pmg_st,\n\u001b[1;32m 2\u001b[0m r_cut\u001b[39m=\u001b[39;49m\u001b[39m5\u001b[39;49m,\n\u001b[1;32m 3\u001b[0m n_max\u001b[39m=\u001b[39;49m\u001b[39m10\u001b[39;49m,\n\u001b[1;32m 4\u001b[0m l_max\u001b[39m=\u001b[39;49m\u001b[39m10\u001b[39;49m,)\n\u001b[1;32m 5\u001b[0m \u001b[39mtype\u001b[39m(a)\n", - "\u001b[1;32m/root/personal_python_utilities/structuretoolkit/.github/asdf.ipynb Cell 6\u001b[0m line \u001b[0;36m2\n\u001b[1;32m 20\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mdscribe\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mdescriptors\u001b[39;00m \u001b[39mimport\u001b[39;00m SOAP\n\u001b[1;32m 22\u001b[0m \u001b[39m# if species is None:\u001b[39;00m\n\u001b[1;32m 23\u001b[0m \u001b[39m# species = list(set(structure.get_chemical_symbols()))\u001b[39;00m\n\u001b[0;32m---> 24\u001b[0m periodic_soap \u001b[39m=\u001b[39m SOAP(\n\u001b[1;32m 25\u001b[0m r_cut\u001b[39m=\u001b[39;49mr_cut,\n\u001b[1;32m 26\u001b[0m n_max\u001b[39m=\u001b[39;49mn_max,\n\u001b[1;32m 27\u001b[0m l_max\u001b[39m=\u001b[39;49ml_max,\n\u001b[1;32m 28\u001b[0m sigma\u001b[39m=\u001b[39;49msigma,\n\u001b[1;32m 29\u001b[0m rbf\u001b[39m=\u001b[39;49mrbf,\n\u001b[1;32m 30\u001b[0m weighting\u001b[39m=\u001b[39;49mweighting,\n\u001b[1;32m 31\u001b[0m average\u001b[39m=\u001b[39;49maverage,\n\u001b[1;32m 32\u001b[0m compression\u001b[39m=\u001b[39;49mcompression,\n\u001b[1;32m 33\u001b[0m species\u001b[39m=\u001b[39;49mspecies,\n\u001b[1;32m 34\u001b[0m periodic\u001b[39m=\u001b[39;49mperiodic,\n\u001b[1;32m 35\u001b[0m sparse\u001b[39m=\u001b[39;49msparse,\n\u001b[1;32m 36\u001b[0m dtype\u001b[39m=\u001b[39;49mdtype,\n\u001b[1;32m 37\u001b[0m )\n\u001b[1;32m 38\u001b[0m \u001b[39mreturn\u001b[39;00m periodic_soap\u001b[39m.\u001b[39mcreate(\n\u001b[1;32m 39\u001b[0m system\u001b[39m=\u001b[39mstructure,\n\u001b[1;32m 40\u001b[0m centers\u001b[39m=\u001b[39mcenters,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 43\u001b[0m verbose\u001b[39m=\u001b[39mverbose,\n\u001b[1;32m 44\u001b[0m )\n", - "File \u001b[0;32m~/miniconda3/envs/dscribe/lib/python3.11/site-packages/dscribe/descriptors/soap.py:203\u001b[0m, in \u001b[0;36mSOAP.__init__\u001b[0;34m(self, r_cut, n_max, l_max, sigma, rbf, weighting, average, compression, species, periodic, sparse, dtype)\u001b[0m\n\u001b[1;32m 200\u001b[0m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__init__\u001b[39m(periodic\u001b[39m=\u001b[39mperiodic, sparse\u001b[39m=\u001b[39msparse, dtype\u001b[39m=\u001b[39mdtype)\n\u001b[1;32m 202\u001b[0m \u001b[39m# Setup the involved chemical species\u001b[39;00m\n\u001b[0;32m--> 203\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49mspecies \u001b[39m=\u001b[39m species\n\u001b[1;32m 205\u001b[0m \u001b[39m# Test that general settings are valid\u001b[39;00m\n\u001b[1;32m 206\u001b[0m \u001b[39mif\u001b[39;00m sigma \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m \u001b[39m0\u001b[39m:\n", - "File \u001b[0;32m~/miniconda3/envs/dscribe/lib/python3.11/site-packages/dscribe/descriptors/soap.py:837\u001b[0m, in \u001b[0;36mSOAP.species\u001b[0;34m(self, value)\u001b[0m\n\u001b[1;32m 829\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Used to check the validity of given atomic numbers and to initialize\u001b[39;00m\n\u001b[1;32m 830\u001b[0m \u001b[39mthe C-memory layout for them.\u001b[39;00m\n\u001b[1;32m 831\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 834\u001b[0m \u001b[39m numbers or list of chemical symbols.\u001b[39;00m\n\u001b[1;32m 835\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 836\u001b[0m \u001b[39m# The species are stored as atomic numbers for internal use.\u001b[39;00m\n\u001b[0;32m--> 837\u001b[0m \u001b[39mself\u001b[39;49m\u001b[39m.\u001b[39;49m_set_species(value)\n\u001b[1;32m 839\u001b[0m \u001b[39m# Setup mappings between atom indices and types\u001b[39;00m\n\u001b[1;32m 840\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39matomic_number_to_index \u001b[39m=\u001b[39m {}\n", - "File \u001b[0;32m~/miniconda3/envs/dscribe/lib/python3.11/site-packages/dscribe/descriptors/descriptor.py:122\u001b[0m, in \u001b[0;36mDescriptor._set_species\u001b[0;34m(self, species)\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"Used to setup the species information for this descriptor. This\u001b[39;00m\n\u001b[1;32m 114\u001b[0m \u001b[39minformation includes an ordered list of unique atomic numbers, a set\u001b[39;00m\n\u001b[1;32m 115\u001b[0m \u001b[39mof atomic numbers and the original variable contents.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 119\u001b[0m \u001b[39m numbers or list of chemical symbols.\u001b[39;00m\n\u001b[1;32m 120\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m 121\u001b[0m \u001b[39m# The species are stored as atomic numbers for internal use.\u001b[39;00m\n\u001b[0;32m--> 122\u001b[0m atomic_numbers \u001b[39m=\u001b[39m get_atomic_numbers(species)\n\u001b[1;32m 123\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_atomic_numbers \u001b[39m=\u001b[39m atomic_numbers\n\u001b[1;32m 124\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_atomic_number_set \u001b[39m=\u001b[39m \u001b[39mset\u001b[39m(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_atomic_numbers)\n", - "File \u001b[0;32m~/miniconda3/envs/dscribe/lib/python3.11/site-packages/dscribe/utils/species.py:59\u001b[0m, in \u001b[0;36mget_atomic_numbers\u001b[0;34m(species)\u001b[0m\n\u001b[1;32m 57\u001b[0m is_string \u001b[39m=\u001b[39m \u001b[39misinstance\u001b[39m(species, (\u001b[39mstr\u001b[39m, np\u001b[39m.\u001b[39mstr_))\n\u001b[1;32m 58\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mnot\u001b[39;00m is_iterable \u001b[39mor\u001b[39;00m is_string:\n\u001b[0;32m---> 59\u001b[0m \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\u001b[39m\"\u001b[39m\u001b[39mPlease provide the species as an iterable, e.g. a list.\u001b[39m\u001b[39m\"\u001b[39m)\n\u001b[1;32m 61\u001b[0m \u001b[39m# Determine if the given species are atomic numbers or chemical symbols\u001b[39;00m\n\u001b[1;32m 62\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mall\u001b[39m(\u001b[39misinstance\u001b[39m(x, (\u001b[39mint\u001b[39m, np\u001b[39m.\u001b[39minteger)) \u001b[39mfor\u001b[39;00m x \u001b[39min\u001b[39;00m species):\n", - "\u001b[0;31mValueError\u001b[0m: Please provide the species as an iterable, e.g. a list." - ] - } - ], - "source": [ - "a = calculate_soap_descriptor_per_atom(pmg_st,\n", - " r_cut=5,\n", - " n_max=10,\n", - " l_max=10,)\n", - "type(a)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(a)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "605" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(a[0])" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [], - "source": [ - "from pymatgen.io.ase import AseAtomsAdaptor\n", - "\n", - "pmg_st = AseAtomsAdaptor().get_structure(struct)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "'Structure' object has no attribute 'get_chemical_symbols'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m/root/personal_python_utilities/structuretoolkit/.github/asdf.ipynb Cell 9\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m a \u001b[39m=\u001b[39m calculate_soap_descriptor_per_atom(pmg_st,\n\u001b[1;32m 2\u001b[0m r_cut\u001b[39m=\u001b[39;49m\u001b[39m5\u001b[39;49m,\n\u001b[1;32m 3\u001b[0m n_max\u001b[39m=\u001b[39;49m\u001b[39m10\u001b[39;49m,\n\u001b[1;32m 4\u001b[0m l_max\u001b[39m=\u001b[39;49m\u001b[39m10\u001b[39;49m,)\n\u001b[1;32m 5\u001b[0m \u001b[39mtype\u001b[39m(a)\n", - "\u001b[1;32m/root/personal_python_utilities/structuretoolkit/.github/asdf.ipynb Cell 9\u001b[0m line \u001b[0;36m2\n\u001b[1;32m 20\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mdscribe\u001b[39;00m\u001b[39m.\u001b[39;00m\u001b[39mdescriptors\u001b[39;00m \u001b[39mimport\u001b[39;00m SOAP\n\u001b[1;32m 22\u001b[0m \u001b[39mif\u001b[39;00m species \u001b[39mis\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m---> 23\u001b[0m species \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(\u001b[39mset\u001b[39m(structure\u001b[39m.\u001b[39;49mget_chemical_symbols()))\n\u001b[1;32m 24\u001b[0m periodic_soap \u001b[39m=\u001b[39m SOAP(\n\u001b[1;32m 25\u001b[0m r_cut\u001b[39m=\u001b[39mr_cut,\n\u001b[1;32m 26\u001b[0m n_max\u001b[39m=\u001b[39mn_max,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 36\u001b[0m dtype\u001b[39m=\u001b[39mdtype,\n\u001b[1;32m 37\u001b[0m )\n\u001b[1;32m 38\u001b[0m \u001b[39mreturn\u001b[39;00m periodic_soap\u001b[39m.\u001b[39mcreate(\n\u001b[1;32m 39\u001b[0m system\u001b[39m=\u001b[39mstructure,\n\u001b[1;32m 40\u001b[0m centers\u001b[39m=\u001b[39mcenters,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 43\u001b[0m verbose\u001b[39m=\u001b[39mverbose,\n\u001b[1;32m 44\u001b[0m )\n", - "\u001b[0;31mAttributeError\u001b[0m: 'Structure' object has no attribute 'get_chemical_symbols'" - ] - } - ], - "source": [ - "a = calculate_soap_descriptor_per_atom(pmg_st,\n", - " r_cut=5,\n", - " n_max=10,\n", - " l_max=10,)\n", - "type(a)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "ename": "ModuleNotFoundError", - "evalue": "No module named 'pandas'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[1;32m/root/personal_python_utilities/structuretoolkit/.github/asdf.ipynb Cell 6\u001b[0m line \u001b[0;36m1\n\u001b[0;32m----> 1\u001b[0m \u001b[39mimport\u001b[39;00m \u001b[39mpandas\u001b[39;00m \u001b[39mas\u001b[39;00m \u001b[39mpd\u001b[39;00m\n\u001b[1;32m 2\u001b[0m b \u001b[39m=\u001b[39m pd\u001b[39m.\u001b[39mDataFrame(a)\n", - "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'pandas'" - ] - } - ], - "source": [ - "import pandas as pd\n", - "b = pd.DataFrame(a)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "dscribe", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.5" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/.github/developVoronoiFeaturiser.ipynb b/.github/developVoronoiFeaturiser.ipynb deleted file mode 100644 index bd4acf048..000000000 --- a/.github/developVoronoiFeaturiser.ipynb +++ /dev/null @@ -1,244 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from pymatgen.io.ase import AseAtomsAdaptor\n", - "from pymatgen.analysis.local_env import VoronoiNN\n", - "from pymatgen.core import Structure\n", - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "from ase.build import bulk" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "def get_stats(property_list, property_str):\n", - " return [f\"{property_str}_std\",f\"{property_str}_mean\",f\"{property_str}_min\",f\"{property_str}_max\"],\\\n", - " [np.std(property_list), np.mean(property_list), np.min(property_list), np.max(property_list)]\n", - " \n", - "def VoronoiSiteFeaturiser(structure, site):\n", - " structure = AseAtomsAdaptor().get_structure(structure)\n", - " coord_no = VoronoiNN().get_cn(structure = structure, n = site)\n", - " site_info_dict = VoronoiNN().get_voronoi_polyhedra(structure, site)\n", - " volumes = [site_info_dict[polyhedra][\"volume\"] for polyhedra in list(site_info_dict.keys())]\n", - " vertices = [site_info_dict[polyhedra][\"n_verts\"] for polyhedra in list(site_info_dict.keys())]\n", - " distances = [site_info_dict[polyhedra][\"face_dist\"] for polyhedra in list(site_info_dict.keys())]\n", - " areas = [site_info_dict[polyhedra][\"area\"] for polyhedra in list(site_info_dict.keys())]\n", - " \n", - " total_area = np.sum(areas)\n", - " total_volume = np.sum(volumes)\n", - " \n", - " df_str_list = [\"VorNN_CoordNo\", \"VorNN_tot_vol\", \"VorNN_tot_area\"]\n", - " df_prop_list = [coord_no, total_volume, total_area]\n", - " \n", - " data_str_list = [\"volumes\", \"vertices\", \"areas\", \"distances\"]\n", - "\n", - " for i, value_list in enumerate([volumes, vertices, areas, distances]):\n", - " property_str_list, property_stats_list = get_stats(value_list, f\"VorNN_{data_str_list[i]}\")\n", - " df_str_list += property_str_list\n", - " df_prop_list += property_stats_list\n", - " \n", - " return df_str_list, df_prop_list" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np\n", - "from pymatgen.analysis.local_env import VoronoiNN\n", - "from pymatgen.core.structure import Structure\n", - "from pymatgen.io.ase import AseAtomsAdaptor\n", - "\n", - "def get_stats(property_list, property_str):\n", - " return {\n", - " f\"{property_str}_std\": np.std(property_list),\n", - " f\"{property_str}_mean\": np.mean(property_list),\n", - " f\"{property_str}_min\": np.min(property_list),\n", - " f\"{property_str}_max\": np.max(property_list)\n", - " }\n", - "\n", - "def VoronoiSiteFeaturiser(structure, site):\n", - " structure = AseAtomsAdaptor().get_structure(structure)\n", - " coord_no = VoronoiNN().get_cn(structure=structure, n=site)\n", - " site_info_dict = VoronoiNN().get_voronoi_polyhedra(structure, site)\n", - " volumes = [site_info_dict[polyhedra][\"volume\"] for polyhedra in site_info_dict]\n", - " vertices = [site_info_dict[polyhedra][\"n_verts\"] for polyhedra in site_info_dict]\n", - " distances = [site_info_dict[polyhedra][\"face_dist\"] for polyhedra in site_info_dict]\n", - " areas = [site_info_dict[polyhedra][\"area\"] for polyhedra in site_info_dict]\n", - "\n", - " total_area = np.sum(areas)\n", - " total_volume = np.sum(volumes)\n", - "\n", - " data = {\n", - " \"VorNN_CoordNo\": coord_no,\n", - " \"VorNN_tot_vol\": total_volume,\n", - " \"VorNN_tot_area\": total_area\n", - " }\n", - "\n", - " data_str_list = [\"volumes\", \"vertices\", \"areas\", \"distances\"]\n", - "\n", - " for i, value_list in enumerate([volumes, vertices, areas, distances]):\n", - " stats = get_stats(value_list, f\"VorNN_{data_str_list[i]}\")\n", - " data.update(stats)\n", - "\n", - " df = pd.DataFrame(data, index=[site])\n", - " return df\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "struct = bulk(\"Fe\", cubic=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
VorNN_CoordNoVorNN_tot_volVorNN_tot_areaVorNN_volumes_stdVorNN_volumes_meanVorNN_volumes_minVorNN_volumes_maxVorNN_vertices_stdVorNN_vertices_meanVorNN_vertices_minVorNN_vertices_maxVorNN_areas_stdVorNN_areas_meanVorNN_areas_minVorNN_areas_maxVorNN_distances_stdVorNN_distances_meanVorNN_distances_minVorNN_distances_max
11411.81995127.5777690.3046540.8442820.4924981.108120.9897435.142857460.8142611.9698411.0296122.6750120.0951411.3251411.2427461.435
\n", - "
" - ], - "text/plain": [ - " VorNN_CoordNo VorNN_tot_vol VorNN_tot_area VorNN_volumes_std \n", - "1 14 11.819951 27.577769 0.304654 \\\n", - "\n", - " VorNN_volumes_mean VorNN_volumes_min VorNN_volumes_max \n", - "1 0.844282 0.492498 1.10812 \\\n", - "\n", - " VorNN_vertices_std VorNN_vertices_mean VorNN_vertices_min \n", - "1 0.989743 5.142857 4 \\\n", - "\n", - " VorNN_vertices_max VorNN_areas_std VorNN_areas_mean VorNN_areas_min \n", - "1 6 0.814261 1.969841 1.029612 \\\n", - "\n", - " VorNN_areas_max VorNN_distances_std VorNN_distances_mean \n", - "1 2.675012 0.095141 1.325141 \\\n", - "\n", - " VorNN_distances_min VorNN_distances_max \n", - "1 1.242746 1.435 " - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "struct = bulk(\"Fe\", cubic=True)\n", - "df = VoronoiSiteFeaturiser(struct, 1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "pyiron", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.10" - }, - "orig_nbformat": 4 - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/tests/pyiron.log b/tests/pyiron.log deleted file mode 100644 index e69de29bb..000000000 From e98fb76879a38ba47bd6bbbebe9e4ab68923643c Mon Sep 17 00:00:00 2001 From: Han Lin Mai Date: Mon, 18 Sep 2023 19:00:57 +0200 Subject: [PATCH 07/12] add test that asserts approx on pd series --- tests/test_pymatgen.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/test_pymatgen.py b/tests/test_pymatgen.py index 2d211bfc0..8d7d0d266 100644 --- a/tests/test_pymatgen.py +++ b/tests/test_pymatgen.py @@ -201,6 +201,11 @@ class TestVoronoiSiteFeaturiser(unittest.TestCase): def setUp(self): self.example_structure = bulk("Fe") + def assertAlmostEqualSeries(self, series, expected_series, decimal=4): + for index, (actual, expected) in enumerate(zip(series, expected_series)): + self.assertAlmostEqual(actual, expected, places=decimal, + msg=f"Failed at index {index}: {actual} != {expected}") + def test_VoronoiSiteFeaturiser(self): # Calculate the expected output manually expected_output = { @@ -228,9 +233,6 @@ def test_VoronoiSiteFeaturiser(self): # Call the function with the example structure df = VoronoiSiteFeaturiser(self.example_structure, 0) - # Define the tolerance for approximate equality (up to 4 decimal places) - tolerance = 1e-4 - # Check that the DataFrame matches the expected output with the specified tolerance for column, expected_value in expected_output.items(): - self.assertAlmostEqual(df[column], expected_value, delta=tolerance) \ No newline at end of file + self.assertAlmostEqualSeries(df[column], expected_value, decimal=4) From 18c9f4c59402941c9bdf9b67208f080d193d16ef Mon Sep 17 00:00:00 2001 From: Han Lin Mai Date: Mon, 18 Sep 2023 19:05:27 +0200 Subject: [PATCH 08/12] add skip condition if pmg not installed --- tests/test_pymatgen.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_pymatgen.py b/tests/test_pymatgen.py index 8d7d0d266..e3036097a 100644 --- a/tests/test_pymatgen.py +++ b/tests/test_pymatgen.py @@ -196,7 +196,10 @@ def test_pyiron_to_pymatgen_conversion(self): ), "Failed to produce equivalent sel_dyn when both magmom + sel_dyn are present!", ) - + +@unittest.skipIf( + skip_pymatgen_test, "pymatgen is not installed, so the pymatgen tests are skipped." +) class TestVoronoiSiteFeaturiser(unittest.TestCase): def setUp(self): self.example_structure = bulk("Fe") From a93e457a4af1ce99ac89e069f7df73372c8c391c Mon Sep 17 00:00:00 2001 From: Han Lin Mai Date: Mon, 18 Sep 2023 20:33:14 +0200 Subject: [PATCH 09/12] lets see if this works... --- tests/test_pymatgen.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/tests/test_pymatgen.py b/tests/test_pymatgen.py index e3036097a..6ab6076a8 100644 --- a/tests/test_pymatgen.py +++ b/tests/test_pymatgen.py @@ -3,13 +3,12 @@ from ase.build import bulk from ase.constraints import FixAtoms from structuretoolkit.common import pymatgen_to_ase, ase_to_pymatgen -from structuretoolkit.analyse.pymatgen import VoronoiSiteFeaturiser try: from pymatgen.core import Structure, Lattice - + from structuretoolkit.analyse.pymatgen import VoronoiSiteFeaturiser skip_pymatgen_test = False -except ImportError: +except ImportError, ModuleNotFoundError: skip_pymatgen_test = True @@ -204,10 +203,23 @@ class TestVoronoiSiteFeaturiser(unittest.TestCase): def setUp(self): self.example_structure = bulk("Fe") - def assertAlmostEqualSeries(self, series, expected_series, decimal=4): - for index, (actual, expected) in enumerate(zip(series, expected_series)): - self.assertAlmostEqual(actual, expected, places=decimal, - msg=f"Failed at index {index}: {actual} != {expected}") + def assertListsAlmostEqual(self, list1, list2, decimal=4): + """ + Check if two lists are approximately equal up to a specified number of decimal places. + + Parameters: + list1 (list): The first list for comparison. + list2 (list): The second list for comparison. + decimal (int): The number of decimal places to consider for comparison. + + Raises: + AssertionError: Raised if the lists are not approximately equal. + """ + self.assertEqual(len(list1), len(list2), "Lists have different lengths") + + for i in range(len(list1)): + self.assertAlmostEqual(list1[i], list2[i], places=decimal, + msg=f"Lists differ at index {i}: {list1[i]} != {list2[i]}") def test_VoronoiSiteFeaturiser(self): # Calculate the expected output manually @@ -235,7 +247,4 @@ def test_VoronoiSiteFeaturiser(self): # Call the function with the example structure df = VoronoiSiteFeaturiser(self.example_structure, 0) - - # Check that the DataFrame matches the expected output with the specified tolerance - for column, expected_value in expected_output.items(): - self.assertAlmostEqualSeries(df[column], expected_value, decimal=4) + self.assertListsAlmostEqual(df.values.tolist()[0], list(expected_output.values()), decimal=4) From 50e25673ca9b41aa8c44e72b6d8bbb670352ce7b Mon Sep 17 00:00:00 2001 From: Han Lin Mai Date: Mon, 18 Sep 2023 20:35:20 +0200 Subject: [PATCH 10/12] multiple exception parenthesis --- tests/test_pymatgen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_pymatgen.py b/tests/test_pymatgen.py index 6ab6076a8..746308d68 100644 --- a/tests/test_pymatgen.py +++ b/tests/test_pymatgen.py @@ -8,7 +8,7 @@ from pymatgen.core import Structure, Lattice from structuretoolkit.analyse.pymatgen import VoronoiSiteFeaturiser skip_pymatgen_test = False -except ImportError, ModuleNotFoundError: +except (ImportError, ModuleNotFoundError): skip_pymatgen_test = True From baf64f35af86e8894beee32a05148e5a75469b85 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 19 Sep 2023 17:31:07 +0200 Subject: [PATCH 11/12] black formatting --- structuretoolkit/analyse/pymatgen.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/structuretoolkit/analyse/pymatgen.py b/structuretoolkit/analyse/pymatgen.py index b24e3ebcc..41655485b 100644 --- a/structuretoolkit/analyse/pymatgen.py +++ b/structuretoolkit/analyse/pymatgen.py @@ -4,6 +4,7 @@ import numpy as np import pandas as pd + def get_stats(property_list, property_str): """ Calculate statistical properties of a list of values. @@ -26,9 +27,10 @@ def get_stats(property_list, property_str): f"{property_str}_std": np.std(property_list), f"{property_str}_mean": np.mean(property_list), f"{property_str}_min": np.min(property_list), - f"{property_str}_max": np.max(property_list) + f"{property_str}_max": np.max(property_list), } + def VoronoiSiteFeaturiser(structure, site): """ Calculate various Voronoi-related features for a specific site in a crystal structure. @@ -60,7 +62,7 @@ def VoronoiSiteFeaturiser(structure, site): data = { "VorNN_CoordNo": coord_no, "VorNN_tot_vol": total_volume, - "VorNN_tot_area": total_area + "VorNN_tot_area": total_area, } data_str_list = ["volumes", "vertices", "areas", "distances"] @@ -70,4 +72,4 @@ def VoronoiSiteFeaturiser(structure, site): data.update(stats) df = pd.DataFrame(data, index=[site]) - return df \ No newline at end of file + return df From d476a31059e144fc81fbca764efc44581250a6a1 Mon Sep 17 00:00:00 2001 From: pyiron-runner Date: Wed, 14 Feb 2024 13:37:15 +0000 Subject: [PATCH 12/12] Format black --- structuretoolkit/analyse/strain.py | 1 - structuretoolkit/analyse/symmetry.py | 1 - 2 files changed, 2 deletions(-) diff --git a/structuretoolkit/analyse/strain.py b/structuretoolkit/analyse/strain.py index 3632c9dda..f9ebb6aaa 100644 --- a/structuretoolkit/analyse/strain.py +++ b/structuretoolkit/analyse/strain.py @@ -6,7 +6,6 @@ class Strain: - """ Calculate local strain of each atom following the Lagrangian strain tensor: diff --git a/structuretoolkit/analyse/symmetry.py b/structuretoolkit/analyse/symmetry.py index bebeeae37..2da660fee 100644 --- a/structuretoolkit/analyse/symmetry.py +++ b/structuretoolkit/analyse/symmetry.py @@ -24,7 +24,6 @@ class Symmetry(dict): - """ Return a class for operations related to box symmetries. Main attributes: