From 283ae6435163b78d183ee9c7aa6854a287927dc5 Mon Sep 17 00:00:00 2001 From: w-bonelli Date: Mon, 8 Apr 2024 15:17:21 -0400 Subject: [PATCH] refactor(particle tracking output): consolidation/cleanup, support prt files --- autotest/test_plotutil.py | 182 +++++--- flopy/plot/plotutil.py | 697 ++++++++++++++++++------------- flopy/utils/modpathfile.py | 90 ++-- flopy/utils/particletrackfile.py | 16 +- flopy/utils/prtfile.py | 166 ++++++++ 5 files changed, 734 insertions(+), 417 deletions(-) create mode 100644 flopy/utils/prtfile.py diff --git a/autotest/test_plotutil.py b/autotest/test_plotutil.py index 8aa228f8aa..bc5fbb05ad 100644 --- a/autotest/test_plotutil.py +++ b/autotest/test_plotutil.py @@ -3,13 +3,17 @@ import pytest from flopy.plot.plotutil import ( - MP7_ENDPOINT_DTYPE, - MP7_PATHLINE_DTYPE, - PRT_PATHLINE_DTYPE, to_mp7_endpoints, to_mp7_pathlines, to_prt_pathlines, ) +from flopy.utils.modpathfile import ( + EndpointFile as MpEndpointFile, +) +from flopy.utils.modpathfile import ( + PathlineFile as MpPathlineFile, +) +from flopy.utils.prtfile import PathlineFile as PrtPathlineFile PRT_TEST_PATHLINES = pd.DataFrame.from_records( [ @@ -194,7 +198,7 @@ "PRP000000001", # name ], ], - columns=PRT_PATHLINE_DTYPE.names, + columns=PrtPathlineFile.dtypes["base"].names, ) MP7_TEST_PATHLINES = pd.DataFrame.from_records( [ @@ -233,7 +237,7 @@ 1, # timestep ], ], - columns=MP7_PATHLINE_DTYPE.names, + columns=MpPathlineFile.dtypes[7].names, ) MP7_TEST_ENDPOINTS = pd.DataFrame.from_records( [ @@ -322,114 +326,152 @@ 2, # cellface ], ], - columns=MP7_ENDPOINT_DTYPE.names, + columns=MpEndpointFile.dtypes[7].names, ) @pytest.mark.parametrize("dataframe", [True, False]) -def test_to_mp7_pathlines(dataframe): - prt_pls = ( - PRT_TEST_PATHLINES - if dataframe - else PRT_TEST_PATHLINES.to_records(index=False) - ) - mp7_pls = to_mp7_pathlines(prt_pls) +@pytest.mark.parametrize("source", ["prt"]) # , "mp3", "mp5", "mp6"]) +def test_to_mp7_pathlines(dataframe, source): + if source == "prt": + pls = ( + PRT_TEST_PATHLINES + if dataframe + else PRT_TEST_PATHLINES.to_records(index=False) + ) + elif source == "mp3": + pass + elif source == "mp5": + pass + elif source == "mp7": + pass + mp7_pls = to_mp7_pathlines(pls) assert ( - type(prt_pls) + type(pls) == type(mp7_pls) == (pd.DataFrame if dataframe else np.recarray) ) assert len(mp7_pls) == 10 assert set( dict(mp7_pls.dtypes).keys() if dataframe else mp7_pls.dtype.names - ) == set(MP7_PATHLINE_DTYPE.names) + ) == set(MpPathlineFile.dtypes[7].names) @pytest.mark.parametrize("dataframe", [True, False]) -def test_to_mp7_pathlines_empty(dataframe): - mp7_pls = to_mp7_pathlines( - pd.DataFrame.from_records([], columns=PRT_PATHLINE_DTYPE.names) - if dataframe - else np.recarray((0,), dtype=PRT_PATHLINE_DTYPE) - ) - assert mp7_pls.empty if dataframe else mp7_pls.size == 0 +@pytest.mark.parametrize("source", ["prt"]) # , "mp3", "mp5", "mp6"]) +def test_to_mp7_pathlines_empty(dataframe, source): + if source == "prt": + pls = to_mp7_pathlines( + pd.DataFrame.from_records( + [], columns=PrtPathlineFile.dtypes["base"].names + ) + if dataframe + else np.recarray((0,), dtype=PrtPathlineFile.dtypes["base"]) + ) + elif source == "mp3": + pass + elif source == "mp5": + pass + elif source == "mp7": + pass + assert pls.empty if dataframe else pls.size == 0 if dataframe: - mp7_pls = mp7_pls.to_records(index=False) - assert mp7_pls.dtype == MP7_PATHLINE_DTYPE + pls = pls.to_records(index=False) + assert pls.dtype == MpPathlineFile.dtypes[7] @pytest.mark.parametrize("dataframe", [True, False]) def test_to_mp7_pathlines_noop(dataframe): - prt_pls = ( + pls = ( MP7_TEST_PATHLINES if dataframe else MP7_TEST_PATHLINES.to_records(index=False) ) - mp7_pls = to_mp7_pathlines(prt_pls) + mp7_pls = to_mp7_pathlines(pls) assert ( - type(prt_pls) + type(pls) == type(mp7_pls) == (pd.DataFrame if dataframe else np.recarray) ) assert len(mp7_pls) == 2 assert set( dict(mp7_pls.dtypes).keys() if dataframe else mp7_pls.dtype.names - ) == set(MP7_PATHLINE_DTYPE.names) + ) == set(MpPathlineFile.dtypes[7].names) assert np.array_equal( mp7_pls if dataframe else pd.DataFrame(mp7_pls), MP7_TEST_PATHLINES ) @pytest.mark.parametrize("dataframe", [True, False]) -def test_to_mp7_endpoints(dataframe): - mp7_eps = to_mp7_endpoints( - PRT_TEST_PATHLINES - if dataframe - else PRT_TEST_PATHLINES.to_records(index=False) - ) - assert len(mp7_eps) == 1 - assert np.isclose(mp7_eps.time[0], PRT_TEST_PATHLINES.t.max()) +@pytest.mark.parametrize("source", ["prt"]) # , "mp3", "mp5", "mp6"]) +def test_to_mp7_endpoints(dataframe, source): + if source == "prt": + eps = to_mp7_endpoints( + PRT_TEST_PATHLINES + if dataframe + else PRT_TEST_PATHLINES.to_records(index=False) + ) + elif source == "mp3": + pass + elif source == "mp5": + pass + elif source == "mp6": + pass + assert len(eps) == 1 + assert np.isclose(eps.time[0], PRT_TEST_PATHLINES.t.max()) assert set( - dict(mp7_eps.dtypes).keys() if dataframe else mp7_eps.dtype.names - ) == set(MP7_ENDPOINT_DTYPE.names) + dict(eps.dtypes).keys() if dataframe else eps.dtype.names + ) == set(MpEndpointFile.dtypes[7].names) @pytest.mark.parametrize("dataframe", [True, False]) -def test_to_mp7_endpoints_empty(dataframe): - mp7_eps = to_mp7_endpoints( - pd.DataFrame.from_records([], columns=PRT_PATHLINE_DTYPE.names) +@pytest.mark.parametrize("source", ["prt"]) # , "mp3", "mp5", "mp6"]) +def test_to_mp7_endpoints_empty(dataframe, source): + eps = to_mp7_endpoints( + pd.DataFrame.from_records( + [], columns=PrtPathlineFile.dtypes["base"].names + ) if dataframe - else np.recarray((0,), dtype=PRT_PATHLINE_DTYPE) + else np.recarray((0,), dtype=PrtPathlineFile.dtypes["base"]) ) - assert mp7_eps.empty if dataframe else mp7_eps.size == 0 + assert eps.empty if dataframe else eps.size == 0 if dataframe: - mp7_eps = mp7_eps.to_records(index=False) - assert mp7_eps.dtype == MP7_ENDPOINT_DTYPE + eps = eps.to_records(index=False) + assert eps.dtype == MpEndpointFile.dtypes[7] @pytest.mark.parametrize("dataframe", [True, False]) def test_to_mp7_endpoints_noop(dataframe): """Test a recarray or dataframe which already contains MP7 endpoint data""" - mp7_eps = to_mp7_endpoints( + eps = to_mp7_endpoints( MP7_TEST_ENDPOINTS if dataframe else MP7_TEST_ENDPOINTS.to_records(index=False) ) assert np.array_equal( - mp7_eps if dataframe else pd.DataFrame(mp7_eps), MP7_TEST_ENDPOINTS + eps if dataframe else pd.DataFrame(eps), MP7_TEST_ENDPOINTS ) @pytest.mark.parametrize("dataframe", [True, False]) -def test_to_prt_pathlines_roundtrip(dataframe): - mp7_pls = to_mp7_pathlines( - PRT_TEST_PATHLINES - if dataframe - else PRT_TEST_PATHLINES.to_records(index=False) - ) - prt_pls = to_prt_pathlines(mp7_pls) +@pytest.mark.parametrize("source", ["prt"]) # , "mp3", "mp5", "mp6"]) +def test_to_prt_pathlines_roundtrip(dataframe, source): + if source == "prt": + pls = to_mp7_pathlines( + PRT_TEST_PATHLINES + if dataframe + else PRT_TEST_PATHLINES.to_records(index=False) + ) + elif source == "mp3": + pass + elif source == "mp5": + pass + elif source == "mp6": + pass + prt_pls = to_prt_pathlines(pls) if not dataframe: prt_pls = pd.DataFrame(prt_pls) + # import pdb; pdb.set_trace() assert np.allclose( PRT_TEST_PATHLINES.drop( ["imdl", "iprp", "irpt", "name", "istatus", "ireason"], @@ -443,15 +485,25 @@ def test_to_prt_pathlines_roundtrip(dataframe): @pytest.mark.parametrize("dataframe", [True, False]) -def test_to_prt_pathlines_roundtrip_empty(dataframe): - mp7_pls = to_mp7_pathlines( - pd.DataFrame.from_records([], columns=PRT_PATHLINE_DTYPE.names) - if dataframe - else np.recarray((0,), dtype=PRT_PATHLINE_DTYPE) - ) - prt_pls = to_prt_pathlines(mp7_pls) - assert mp7_pls.empty if dataframe else mp7_pls.size == 0 - assert prt_pls.empty if dataframe else mp7_pls.size == 0 +@pytest.mark.parametrize("source", ["prt"]) # , "mp3", "mp5", "mp6"]) +def test_to_prt_pathlines_roundtrip_empty(dataframe, source): + if source == "prt": + pls = to_mp7_pathlines( + pd.DataFrame.from_records( + [], columns=PrtPathlineFile.dtypes["base"].names + ) + if dataframe + else np.recarray((0,), dtype=PrtPathlineFile.dtypes["base"]) + ) + elif source == "mp3": + pass + elif source == "mp5": + pass + elif source == "mp6": + pass + prt_pls = to_prt_pathlines(pls) + assert pls.empty if dataframe else pls.size == 0 + assert prt_pls.empty if dataframe else pls.size == 0 assert set( - dict(mp7_pls.dtypes).keys() if dataframe else mp7_pls.dtype.names - ) == set(MP7_PATHLINE_DTYPE.names) + dict(pls.dtypes).keys() if dataframe else pls.dtype.names + ) == set(MpPathlineFile.dtypes[7].names) diff --git a/flopy/plot/plotutil.py b/flopy/plot/plotutil.py index 57af252a4f..320c1ae767 100644 --- a/flopy/plot/plotutil.py +++ b/flopy/plot/plotutil.py @@ -7,14 +7,15 @@ import os import warnings -from itertools import repeat -from typing import Union +from pprint import pformat +from typing import List, Optional, Tuple, Union import matplotlib.pyplot as plt import numpy as np import pandas as pd from ..datbase import DataInterface, DataType +from ..discretization.grid import Grid from ..utils import Util3d, import_optional_dependency warnings.simplefilter("ignore", RuntimeWarning) @@ -2600,369 +2601,465 @@ def parse_modpath_selection_options( return tep, istart, xp, yp -PRT_PATHLINE_DTYPE = np.dtype( - [ - ("kper", np.int32), - ("kstp", np.int32), - ("imdl", np.int32), - ("iprp", np.int32), - ("irpt", np.int32), - ("ilay", np.int32), - ("icell", np.int32), - ("izone", np.int32), - ("istatus", np.int32), - ("ireason", np.int32), - ("trelease", np.float32), - ("t", np.float32), - ("x", np.float32), - ("y", np.float32), - ("z", np.float32), - ("name", np.str_), - ] -) -MP7_PATHLINE_DTYPE = np.dtype( - [ - ("particleid", np.int32), # same as sequencenumber - ("particlegroup", np.int32), - ( - "sequencenumber", - np.int32, - ), # mp7 sequencenumber (globally unique auto-generated ID) - ( - "particleidloc", - np.int32, - ), # mp7 particle ID (unique within a group, user-assigned or autogenerated) - ("time", np.float32), - ("x", np.float32), - ("y", np.float32), - ("z", np.float32), - ("k", np.int32), - ("node", np.int32), - ("xloc", np.float32), - ("yloc", np.float32), - ("zloc", np.float32), - ("stressperiod", np.int32), - ("timestep", np.int32), - ] -) -MP7_ENDPOINT_DTYPE = np.dtype( - [ - ( - "particleid", - np.int32, - ), # mp7 sequencenumber (globally unique auto-generated ID) - ("particlegroup", np.int32), - ( - "particleidloc", - np.int32, - ), # mp7 particle ID (unique within a group, user-assigned or autogenerated) - ("status", np.int32), - ("time0", np.float32), - ("time", np.float32), - ("node0", np.int32), - ("k0", np.int32), - ("xloc0", np.float32), - ("yloc0", np.float32), - ("zloc0", np.float32), - ("x0", np.float32), - ("y0", np.float32), - ("z0", np.float32), - ("zone0", np.int32), - ("initialcellface", np.int32), - ("node", np.int32), - ("k", np.int32), - ("xloc", np.float32), - ("yloc", np.float32), - ("zloc", np.float32), - ("x", np.float32), - ("y", np.float32), - ("z", np.float32), - ("zone", np.int32), - ("cellface", np.int32), - ] -) - - def to_mp7_pathlines( data: Union[np.recarray, pd.DataFrame], + grid: Optional[Grid] = None, + tdis: Optional[List[Tuple[int, int, float]]] = None, ) -> Union[np.recarray, pd.DataFrame]: """ - Convert MODFLOW 6 PRT pathline data to MODPATH 7 pathline format. + Convert pathline data to MODPATH 7 pathline format. Pathline + input ``data`` is expected in MODPATH 3, 5, 6, 7, or MODFLOW + 6 PRT format. + + Notes + ----- + This function is idempotent. Parameters ---------- data : np.recarray or pd.DataFrame - MODFLOW 6 PRT pathline data + Tabular pathline data, in MODPATH or MODFLOW 6 PRT format. + grid : flopy.discretization.grid.Grid + Optional grid discretization to compute local coordinates, + if not provided in the input data. + tdis : list of tuples (int, int, float) + Optional time discretization to compute stress period and + time step, if not provided in the input data. Returns ------- np.recarray or pd.DataFrame (consistent with input type) """ - from flopy.utils.particletrackfile import MIN_PARTICLE_TRACK_DTYPE - - # determine return type - ret_type = type(data) + from flopy.utils.modpathfile import PathlineFile as MpPathlineFile + from flopy.utils.particletrackfile import ParticleTrackFile + from flopy.utils.prtfile import PathlineFile as PrtPathlineFile - # convert to dataframe if needed - if not isinstance(data, pd.DataFrame): + rt = type(data) # return type will match input type + if data.size == 0: # return early if empty + data = np.recarray((0,), dtype=MpPathlineFile.dtypes[7]) + return pd.DataFrame(data) if rt == pd.DataFrame else data + if not isinstance(data, pd.DataFrame): # convert to dataframe if needed data = pd.DataFrame(data) - # check format - dt = data.dtypes - if not ( - all(n in dt for n in MIN_PARTICLE_TRACK_DTYPE.names) - or all(n in dt for n in PRT_PATHLINE_DTYPE.names) + # do the conversion if we recognize the input dtype, otherwise raise an error + if all( + n in data.dtypes for n in MpPathlineFile.dtypes[7].names + ): # already in MP7 format? + return data if rt == pd.DataFrame else data.to_records(index=False) + elif all(n in data.dtypes for n in MpPathlineFile.dtypes[6].names): + # todo stressperiod/timestep, from tdis + # nper = len(tdis) + data = np.core.records.fromarrays( + [ + data["particleid"], + data["particlegroup"], + np.zeros(data.shape[0]), + np.zeros(data.shape[0]), + data["time"], + data["x"], + data["y"], + data["z"], + data["k"], + np.zeros(data.shape[0]) + if grid is None + else data["k"] * grid.nrow * grid.ncol + + data["i"] * grid.ncol + + data["j"], + data["xloc"], + data["yloc"], + data["zloc"], + # todo stressperiod/timestep, from tdis + np.zeros(data.shape[0]), + np.zeros(data.shape[0]), + ], + dtype=MpPathlineFile.dtypes[7], + ) + elif all(n in data.dtypes for n in MpPathlineFile.dtypes[5].names) or all( + n in data.dtypes for n in MpPathlineFile.dtypes[3].names ): - raise ValueError( - "Pathline data must contain the following fields: " - f"{MIN_PARTICLE_TRACK_DTYPE.names} for MODPATH 7, or " - f"{PRT_PATHLINE_DTYPE.names} for MODFLOW 6 PRT" + # todo stressperiod/timestep, from tdis + # nper = len(tdis) + data = np.core.records.fromarrays( + [ + data["particleid"], + np.zeros(data.shape[0]), + np.zeros(data.shape[0]), + np.zeros(data.shape[0]), + data["time"], + data["x"], + data["y"], + data["z"], + data["k"], + np.zeros(data.shape[0]) + if grid is None + else data["k"] * grid.nrow * grid.ncol + + data["i"] * grid.ncol + + data["j"], + np.zeros(data.shape[0]), + np.zeros(data.shape[0]), + data["zloc"], + # todo stressperiod/timestep, from tdis + np.zeros(data.shape[0]), + np.zeros(data.shape[0]), + ], + dtype=MpPathlineFile.dtypes[7], ) - - # return early if already in MP7 format - if "t" not in dt: - return ( - data if ret_type == pd.DataFrame else data.to_records(index=False) + elif all(n in data.dtypes for n in PrtPathlineFile.dtypes["base"].names): + # assign a unique particle index column, incrementing an integer + # for each unique combination of irpt, iprp, imdl, and trelease. + data = data.sort_values(PrtPathlineFile.key_cols) + data["sequencenumber"] = data.groupby( + PrtPathlineFile.key_cols + ).ngroup() + data = np.core.records.fromarrays( + [ + data["sequencenumber"], + data["iprp"], + data["sequencenumber"], + data["irpt"], + data["t"], + data["x"], + data["y"], + data["z"], + data["ilay"], + data["icell"], + # todo local coords (xloc, yloc, zloc) + np.zeros(data.shape[0]), + np.zeros(data.shape[0]), + np.zeros(data.shape[0]), + data["kper"], + data["kstp"], + ], + dtype=MpPathlineFile.dtypes[7], + ) + elif all(n in data.dtypes for n in ParticleTrackFile.dtype.names): + data = np.core.records.fromarrays( + [ + data["particleid"], + np.zeros(data.shape[0]), + np.zeros(data.shape[0]), + np.zeros(data.shape[0]), + data["time"], + data["x"], + data["y"], + data["z"], + data["k"], + np.zeros(data.shape[0]), + np.zeros(data.shape[0]), + np.zeros(data.shape[0]), + np.zeros(data.shape[0]), + np.zeros(data.shape[0]), + np.zeros(data.shape[0]), + ], + dtype=MpPathlineFile.dtypes[7], + ) + else: + raise ValueError( + "Unrecognized dtype, expected:\n" + f"{pformat(MpPathlineFile.dtypes[3].names)} for MODPATH 3, or;\n" + f"{pformat(MpPathlineFile.dtypes[5].names)} for MODPATH 5, or;\n" + f"{pformat(MpPathlineFile.dtypes[6].names)} for MODPATH 6, or;\n" + f"{pformat(MpPathlineFile.dtypes[7].names)} for MODPATH 7, or;\n" + f"{pformat(PrtPathlineFile.dtypes['base'].names)} for MODFLOW 6 PRT, or;\n" + f"{pformat(ParticleTrackFile.dtype.names)} (minimal expected dtype names)" ) - # return early if empty - if data.empty: - ret = np.recarray((0,), dtype=MP7_PATHLINE_DTYPE) - return pd.DataFrame(ret) if ret_type == pd.DataFrame else ret - - # assign a unique particle index column incrementing an integer - # for each unique combination of irpt, iprp, imdl, and trelease - data = data.sort_values(["imdl", "iprp", "irpt", "trelease"]) - particles = data.groupby(["imdl", "iprp", "irpt", "trelease"]) - seqn_key = "sequencenumber" - data[seqn_key] = particles.ngroup() - - # convert to recarray - data = data.to_records(index=False) - - # build mp7 format recarray - ret = np.core.records.fromarrays( - [ - data[seqn_key], - data["iprp"], - data[seqn_key], - data["irpt"], - data["t"], - data["x"], - data["y"], - data["z"], - data["ilay"], - data["icell"], - # todo local coords (xloc, yloc, zloc) - np.zeros(data.shape[0]), - np.zeros(data.shape[0]), - np.zeros(data.shape[0]), - data["kper"], - data["kstp"], - ], - dtype=MP7_PATHLINE_DTYPE, - ) - - return pd.DataFrame(ret) if ret_type == pd.DataFrame else ret + return pd.DataFrame(data) if rt == pd.DataFrame else data def to_mp7_endpoints( data: Union[np.recarray, pd.DataFrame], + grid: Optional[Grid] = None, + tdis: Optional[List[Tuple[int, int, float]]] = None, ) -> Union[np.recarray, pd.DataFrame]: """ - Convert MODFLOW 6 PRT pathline data to MODPATH 7 endpoint format. + Convert pathline or endpoint data to MODPATH 7 endpoint format. + + Notes + ----- + This function is idempotent. Parameters ---------- data : np.recarray or pd.DataFrame - MODFLOW 6 PRT pathline data + Tabular pathline or endpoint data, in MODPATH or MODFLOW 6 PRT format. + grid : flopy.discretization.grid.Grid + Optional grid discretization to compute local coordinates, + if not provided in the input data. + tdis : list of tuples (int, int, float) + Optional time discretization to compute stress period and + time step, if not provided in the input data. Returns ------- np.recarray or pd.DataFrame (consistent with input type) """ - from flopy.utils.particletrackfile import MIN_PARTICLE_TRACK_DTYPE + from flopy.utils.modpathfile import EndpointFile as MpEndpointFile + from flopy.utils.particletrackfile import ParticleTrackFile + from flopy.utils.prtfile import PathlineFile as PrtPathlineFile - # determine return type - ret_type = type(data) - - # convert to dataframe if needed - if isinstance(data, np.recarray): + rt = type(data) # return type will match input type + if data.size == 0: # return early if empty + data = np.recarray((0,), dtype=MpEndpointFile.dtypes[7]) + return pd.DataFrame(data) if rt == pd.DataFrame else data + if not isinstance(data, pd.DataFrame): # convert to dataframe if needed data = pd.DataFrame(data) - # check format - dt = data.dtypes - if all(n in dt for n in MP7_ENDPOINT_DTYPE.names): - return ( - data if ret_type == pd.DataFrame else data.to_records(index=False) + # do the conversion if we recognize the input dtype, otherwise raise an error + if all( + n in data.dtypes for n in MpEndpointFile.dtypes[7].names + ): # already in MP7 format? + return data if rt == pd.DataFrame else data.to_records(index=False) + elif all(n in data.dtypes for n in MpEndpointFile.dtypes[3].names): + raise NotImplementedError("MP3 endpoint files not yet supported") + elif all(n in data.dtypes for n in MpEndpointFile.dtypes[5].names): + raise NotImplementedError("MP5 endpoint files not yet supported") + elif all(n in data.dtypes for n in MpEndpointFile.dtypes[6].names): + raise NotImplementedError("MP6 endpoint files not yet supported") + elif all(n in data.dtypes for n in PrtPathlineFile.dtypes["base"].names): + # assign a unique particle index column incrementing an integer + # for each unique combination of irpt, iprp, imdl, and trelease + data = data.sort_values(["imdl", "iprp", "irpt", "trelease"]) + particles = data.groupby(["imdl", "iprp", "irpt", "trelease"]) + seqn_key = "sequencenumber" + data[seqn_key] = particles.ngroup() + + # select startpoints and endpoints, sorting by sequencenumber + startpts = ( + data.sort_values("t") + .groupby(seqn_key) + .head(1) + .sort_values(seqn_key) ) - if not ( - all(n in dt for n in MIN_PARTICLE_TRACK_DTYPE.names) - or all(n in dt for n in PRT_PATHLINE_DTYPE.names) - ): - raise ValueError( - "Pathline data must contain the following fields: " - f"{MIN_PARTICLE_TRACK_DTYPE.names} for MODPATH 7, or " - f"{PRT_PATHLINE_DTYPE.names} for MODFLOW 6 PRT" + endpts = ( + data.sort_values("t") + .groupby(seqn_key) + .tail(1) + .sort_values(seqn_key) ) - # return early if empty - if data.empty: - ret = np.recarray((0,), dtype=MP7_ENDPOINT_DTYPE) - return pd.DataFrame(ret) if ret_type == pd.DataFrame else ret - - # assign a unique particle index column incrementing an integer - # for each unique combination of irpt, iprp, imdl, and trelease - data = data.sort_values(["imdl", "iprp", "irpt", "trelease"]) - particles = data.groupby(["imdl", "iprp", "irpt", "trelease"]) - seqn_key = "sequencenumber" - data[seqn_key] = particles.ngroup() - - # select startpoints and endpoints, sorting by sequencenumber - startpts = ( - data.sort_values("t").groupby(seqn_key).head(1).sort_values(seqn_key) - ) - endpts = ( - data.sort_values("t").groupby(seqn_key).tail(1).sort_values(seqn_key) - ) + # add columns + pairings = [ + # initial coordinates + ("x0", "x"), + ("y0", "y"), + ("z0", "z"), + # initial zone + ("zone0", "izone"), + # initial node number + ("node0", "icell"), + # initial layer + ("k0", "ilay"), + ] + conditions = [ + startpts[seqn_key].eq(row[seqn_key]) + for _, row in startpts.iterrows() + ] + for fl, fr in pairings: + endpts[fl] = np.select(conditions, startpts[fr].to_numpy()) - # add columns for - pairings = [ - # initial coordinates - ("x0", "x"), - ("y0", "y"), - ("z0", "z"), - # initial zone - ("zone0", "izone"), - # initial node number - ("node0", "icell"), - # initial layer - ("k0", "ilay"), - ] - conditions = [ - startpts[seqn_key].eq(row[seqn_key]) for _, row in startpts.iterrows() - ] - for fl, fr in pairings: - endpts[fl] = np.select(conditions, startpts[fr].to_numpy()) - - # convert to recarray - endpts = endpts.to_records(index=False) - - # build mp7 format recarray - ret = np.core.records.fromarrays( - [ - endpts["sequencenumber"], - endpts["iprp"], - endpts["irpt"], - endpts["istatus"], - endpts["trelease"], - endpts["t"], - endpts["node0"], - endpts["k0"], - # todo initial local coords (xloc0, yloc0, zloc0) - np.zeros(endpts.shape[0]), - np.zeros(endpts.shape[0]), - np.zeros(endpts.shape[0]), - endpts["x0"], - endpts["y0"], - endpts["z0"], - endpts["zone0"], - np.zeros(endpts.shape[0]), # todo initial cell face? - endpts["icell"], - endpts["ilay"], - # todo local coords (xloc, yloc, zloc) - np.zeros(endpts.shape[0]), - np.zeros(endpts.shape[0]), - np.zeros(endpts.shape[0]), - endpts["x"], - endpts["y"], - endpts["z"], - endpts["izone"], - np.zeros(endpts.shape[0]), # todo cell face? - ], - dtype=MP7_ENDPOINT_DTYPE, - ) + data = np.core.records.fromarrays( + [ + endpts["sequencenumber"], + endpts["iprp"], + endpts["irpt"], + endpts["istatus"], + endpts["trelease"], + endpts["t"], + endpts["node0"], + endpts["k0"], + # todo initial local coords (xloc0, yloc0, zloc0) + np.zeros(endpts.shape[0]), + np.zeros(endpts.shape[0]), + np.zeros(endpts.shape[0]), + endpts["x0"], + endpts["y0"], + endpts["z0"], + endpts["zone0"], + np.zeros(endpts.shape[0]), # todo initial cell face? + endpts["icell"], + endpts["ilay"], + # todo local coords (xloc, yloc, zloc) + np.zeros(endpts.shape[0]), + np.zeros(endpts.shape[0]), + np.zeros(endpts.shape[0]), + endpts["x"], + endpts["y"], + endpts["z"], + endpts["izone"], + np.zeros(endpts.shape[0]), # todo cell face? + ], + dtype=MpEndpointFile.dtypes[7], + ) + elif all(n in data.dtypes for n in ParticleTrackFile.dtype.names): + data = np.core.records.fromarrays( + [ + endpts["particleid"], + np.zeros(endpts.shape[0]), + np.zeros(endpts.shape[0]), + np.zeros(endpts.shape[0]), + np.zeros(endpts.shape[0]), + endpts["time"], + np.zeros(endpts.shape[0]), + np.zeros(endpts.shape[0]), + # todo initial local coords (xloc0, yloc0, zloc0) + np.zeros(endpts.shape[0]), + np.zeros(endpts.shape[0]), + np.zeros(endpts.shape[0]), + np.zeros(endpts.shape[0]), + np.zeros(endpts.shape[0]), + np.zeros(endpts.shape[0]), + np.zeros(endpts.shape[0]), + np.zeros(endpts.shape[0]), # todo initial cell face? + endpts["k"], + np.zeros(endpts.shape[0]), + np.zeros(endpts.shape[0]), + np.zeros(endpts.shape[0]), + endpts["x"], + endpts["y"], + endpts["z"], + np.zeros(endpts.shape[0]), + np.zeros(endpts.shape[0]), + ], + dtype=MpEndpointFile.dtypes[7], + ) + else: + raise ValueError( + "Unrecognized dtype, expected:\n" + f"{pformat(MpEndpointFile.dtypes[3].names)} for MODPATH 3, or;\n" + f"{pformat(MpEndpointFile.dtypes[5].names)} for MODPATH 5, or;\n" + f"{pformat(MpEndpointFile.dtypes[6].names)} for MODPATH 6, or;\n" + f"{pformat(MpEndpointFile.dtypes[7].names)} for MODPATH 7, or;\n" + f"{pformat(PrtPathlineFile.dtypes['base'].names)} for MODFLOW 6 PRT, or;\n" + f"{pformat(ParticleTrackFile.dtype.names)} (minimal expected dtype names)" + ) - return pd.DataFrame(ret) if ret_type == pd.DataFrame else ret + return pd.DataFrame(data) if rt == pd.DataFrame else data def to_prt_pathlines( data: Union[np.recarray, pd.DataFrame], ) -> Union[np.recarray, pd.DataFrame]: """ - Convert MODPATH 7 pathline or endpoint data to MODFLOW 6 PRT pathline format. + Convert MODPATH pathline or endpoint data to MODFLOW 6 PRT pathline format. + + todo: support mp timeseries input Parameters ---------- data : np.recarray or pd.DataFrame - MODPATH 7 pathline or endpoint data + MODPATH pathline or endpoint data Returns ------- np.recarray or pd.DataFrame (consistent with input type) """ - # determine return type - ret_type = type(data) - - # convert to dataframe if needed - if isinstance(data, np.recarray): + from flopy.utils.modpathfile import ( + EndpointFile as MpEndpointFile, + ) + from flopy.utils.modpathfile import ( + PathlineFile as MpPathlineFile, + ) + from flopy.utils.particletrackfile import ParticleTrackFile + from flopy.utils.prtfile import PathlineFile as PrtPathlineFile + + rt = type(data) # return type will match input type + if data.size == 0: # return early if empty + data = np.recarray((0,), dtype=PrtPathlineFile.dtypes["base"]) + return pd.DataFrame(data) if rt == pd.DataFrame else data + if not isinstance(data, pd.DataFrame): # convert to dataframe if needed data = pd.DataFrame(data) - # check format - dt = data.dtypes - if not ( - all(n in dt for n in MP7_PATHLINE_DTYPE.names) - or all(n in dt for n in PRT_PATHLINE_DTYPE.names) + # do the conversion if we recognize the input dtype, otherwise raise an error + if all(n in data.dtypes for n in MpPathlineFile.dtypes[7].names) or all( + n in data.dtypes for n in MpEndpointFile.dtypes[7].names ): - raise ValueError( - "Pathline data must contain the following fields: " - f"{MP7_PATHLINE_DTYPE.names} for MODPATH 7, or " - f"{PRT_PATHLINE_DTYPE.names} for MODFLOW 6 PRT" + data = np.core.records.fromarrays( + [ + data["stressperiod"], + data["timestep"], + np.zeros(data.shape[0]), + data["particlegroup"], + data["sequencenumber"], + data["k"], + data["node"], + np.zeros(data.shape[0]), # todo izone? + np.zeros(data.shape[0]), # todo istatus? + np.zeros(data.shape[0]), # todo ireason? + np.zeros(data.shape[0]), # todo trelease? + data["time"], + data["x"], + data["y"], + data["z"], + ["" for _ in range(data.shape[0])], + ], + dtype=PrtPathlineFile.dtypes["base"], ) - - # return early if already in PRT format - if "t" in dt: - return ( - data if ret_type == pd.DataFrame else data.to_records(index=False) + elif all(n in data.dtypes for n in MpPathlineFile.dtypes[3].names) or all( + n in data.dtypes for n in MpEndpointFile.dtypes[3].names + ): + raise NotImplementedError( + "MP3 pathline and endpoint files not yet supported" + ) + elif all(n in data.dtypes for n in MpPathlineFile.dtypes[5].names) or all( + n in data.dtypes for n in MpEndpointFile.dtypes[5].names + ): + raise NotImplementedError( + "MP5 pathline and endpoint files not yet supported" + ) + elif all(n in data.dtypes for n in MpPathlineFile.dtypes[6].names) or all( + n in data.dtypes for n in MpEndpointFile.dtypes[6].names + ): + raise NotImplementedError( + "MP6 pathline and endpoint files not yet supported" + ) + elif all( + n in data.dtypes for n in PrtPathlineFile.dtypes["base"].names + ): # already in prt format? + return data if rt == pd.DataFrame else data.to_records(index=False) + elif all(n in data.dtypes for n in ParticleTrackFile.dtype.names): + data = np.core.records.fromarrays( + [ + np.zeros(data.shape[0]), + np.zeros(data.shape[0]), + np.zeros(data.shape[0]), + np.zeros(data.shape[0]), + data["particleid"], + data["k"], + np.zeros(data.shape[0]), + np.zeros(data.shape[0]), # todo izone? + np.zeros(data.shape[0]), # todo istatus? + np.zeros(data.shape[0]), # todo ireason? + np.zeros(data.shape[0]), # todo trelease? + data["time"], + data["x"], + data["y"], + data["z"], + ], + dtype=PrtPathlineFile.dtypes["base"], + ) + else: + raise ValueError( + "Unrecognized dtype, expected:\n" + f"{pformat(MpPathlineFile.dtypes[7].names)} for MODPATH 7 pathlines, or;\n" + f"{pformat(MpPathlineFile.dtypes[6].names)} for MODPATH 6 pathlines, or;\n" + f"{pformat(MpPathlineFile.dtypes[5].names)} for MODPATH 5 pathlines, or;\n" + f"{pformat(MpPathlineFile.dtypes[3].names)} for MODPATH 3 pathlines, or;\n" + f"{pformat(MpEndpointFile.dtypes[7].names)} for MODPATH 7 endpoints, or;\n" + f"{pformat(MpEndpointFile.dtypes[6].names)} for MODPATH 6 endpoints, or;\n" + f"{pformat(MpEndpointFile.dtypes[5].names)} for MODPATH 5 endpoints, or;\n" + f"{pformat(MpEndpointFile.dtypes[3].names)} for MODPATH 3 endpoints, or;\n" + f"{pformat(PrtPathlineFile.dtypes['base'].names)} for MODFLOW 6 PRT, or;\n" + f"{pformat(ParticleTrackFile.dtype.names)} (minimal expected dtype names)" ) - # return early if empty - if data.empty: - ret = np.recarray((0,), dtype=PRT_PATHLINE_DTYPE) - return pd.DataFrame(ret) if ret_type == pd.DataFrame else ret - - # convert to recarray - data = data.to_records(index=False) - - # build prt format recarray - ret = np.core.records.fromarrays( - [ - data["stressperiod"], - data["timestep"], - np.zeros(data.shape[0]), - data["particlegroup"], - data["sequencenumber"], - data["k"], - data["node"], - np.zeros(data.shape[0]), # todo izone? - np.zeros(data.shape[0]), # todo istatus? - np.zeros(data.shape[0]), # todo ireason? - np.zeros(data.shape[0]), # todo trelease? - data["time"], - data["x"], - data["y"], - data["z"], - np.zeros(data.shape[0], str), - ], - dtype=PRT_PATHLINE_DTYPE, - ) - - if ret_type == pd.DataFrame: - df = pd.DataFrame(ret) - df.name = df.name.astype(pd.StringDtype()) - return df + if rt == pd.DataFrame: + data = pd.DataFrame(data) + if "name" in data.dtypes.keys(): + data.name = data.name.astype(pd.StringDtype()) + return data else: - return ret + return data diff --git a/flopy/utils/modpathfile.py b/flopy/utils/modpathfile.py index 813160c5de..4fe026fb59 100644 --- a/flopy/utils/modpathfile.py +++ b/flopy/utils/modpathfile.py @@ -30,6 +30,51 @@ def __init__( self.direction, ) = self.parse(filename, self.output_type) + def intersect( + self, cells, to_recarray + ) -> Union[List[np.recarray], np.recarray]: + if self.version < 7: + try: + raslice = self._data[["k", "i", "j"]] + except (KeyError, ValueError): + raise KeyError( + "could not extract 'k', 'i', and 'j' keys " + "from {} data".format(self.output_type.lower()) + ) + else: + try: + raslice = self._data[["node"]] + except (KeyError, ValueError): + msg = "could not extract 'node' key from {} data".format( + self.output_type.lower() + ) + raise KeyError(msg) + if isinstance(cells, (list, tuple)): + allint = all(isinstance(el, int) for el in cells) + # convert to a list of tuples + if allint: + t = [] + for el in cells: + t.append((el,)) + cells = t + + cells = np.array(cells, dtype=raslice.dtype) + inds = np.in1d(raslice, cells) + epdest = self._data[inds].copy().view(np.recarray) + + if to_recarray: + # use particle ids to get the rest of the paths + inds = np.in1d(self._data["particleid"], epdest.particleid) + series = self._data[inds].copy() + series.sort(order=["particleid", "time"]) + series = series.view(np.recarray) + else: + # collect unique particleids in selection + partids = np.unique(epdest["particleid"]) + series = [self.get_data(partid) for partid in partids] + + return series + @staticmethod def parse( file_path: Union[str, os.PathLike], file_type: str @@ -95,51 +140,6 @@ def parse( return modpath, compact, skiprows, version, direction - def intersect( - self, cells, to_recarray - ) -> Union[List[np.recarray], np.recarray]: - if self.version < 7: - try: - raslice = self._data[["k", "i", "j"]] - except (KeyError, ValueError): - raise KeyError( - "could not extract 'k', 'i', and 'j' keys " - "from {} data".format(self.output_type.lower()) - ) - else: - try: - raslice = self._data[["node"]] - except (KeyError, ValueError): - msg = "could not extract 'node' key from {} data".format( - self.output_type.lower() - ) - raise KeyError(msg) - if isinstance(cells, (list, tuple)): - allint = all(isinstance(el, int) for el in cells) - # convert to a list of tuples - if allint: - t = [] - for el in cells: - t.append((el,)) - cells = t - - cells = np.array(cells, dtype=raslice.dtype) - inds = np.in1d(raslice, cells) - epdest = self._data[inds].copy().view(np.recarray) - - if to_recarray: - # use particle ids to get the rest of the paths - inds = np.in1d(self._data["particleid"], epdest.particleid) - series = self._data[inds].copy() - series.sort(order=["particleid", "time"]) - series = series.view(np.recarray) - else: - # collect unique particleids in selection - partids = np.unique(epdest["particleid"]) - series = [self.get_data(partid) for partid in partids] - - return series - class PathlineFile(ModpathFile): """ diff --git a/flopy/utils/particletrackfile.py b/flopy/utils/particletrackfile.py index 5029c5860d..bfd4811783 100644 --- a/flopy/utils/particletrackfile.py +++ b/flopy/utils/particletrackfile.py @@ -40,11 +40,15 @@ class ParticleTrackFile(ABC): """ + dtype = MIN_PARTICLE_TRACK_DTYPE + """ + Minimal data shared by all particle track file formats. + """ + + # legacy, todo: deprecate outdtype = MIN_PARTICLE_TRACK_DTYPE """ - Minimal information shared by all particle track file formats. - Track data are converted to this dtype for internal storage - and for return from (sub-)class methods. + Minimal data shared by all particle track file formats. """ def __init__( @@ -173,10 +177,8 @@ def get_destination_data( Returns ------- - data : np.recarray - Slice of data array (e.g. PathlineFile._data, TimeseriesFile._data) - containing endpoint, pathline, or timeseries data that intersect - (k,i,j) or (node) dest_cells. + np.recarray or list of np.recarray + Pathline components intersecting (k,i,j) or (node) dest_cells. """ diff --git a/flopy/utils/prtfile.py b/flopy/utils/prtfile.py new file mode 100644 index 0000000000..2fdabcdc19 --- /dev/null +++ b/flopy/utils/prtfile.py @@ -0,0 +1,166 @@ +""" +Support for MODFLOW 6 PRT output files. +""" + +import os +from pathlib import Path +from typing import Dict, List, Optional, Union + +import numpy as np +import pandas as pd + +from flopy.utils.particletrackfile import ParticleTrackFile + + +class PathlineFile(ParticleTrackFile): + """Provides MODFLOW 6 prt output file support.""" + + key_cols = ["imdl", "iprp", "irpt", "trelease"] + """Columns making up each particle's composite key.""" + + dtypes = { + "base": np.dtype( + [ + ("kper", np.int32), + ("kstp", np.int32), + ("imdl", np.int32), + ("iprp", np.int32), + ("irpt", np.int32), + ("ilay", np.int32), + ("icell", np.int32), + ("izone", np.int32), + ("istatus", np.int32), + ("ireason", np.int32), + ("trelease", np.float32), + ("t", np.float32), + ("x", np.float32), + ("y", np.float32), + ("z", np.float32), + ("name", np.str_), + ] + ), + "full": np.dtype( + [ + ("kper", np.int32), + ("kstp", np.int32), + ("imdl", np.int32), + ("iprp", np.int32), + ("irpt", np.int32), + ("ilay", np.int32), + ("k", np.int32), # conform to canonical dtype + ("icell", np.int32), + ("izone", np.int32), + ("idest", np.int32), # destination zone, for convenience + ("dest", np.str_), # destination name, for convenience + ("istatus", np.int32), + ("ireason", np.int32), + ("trelease", np.int32), + ("t", np.int32), + ("t0", np.int32), # release time, for convenience + ("tt", np.int32), # termination time, convenience + ("time", np.int32), # conform to canonical dtype + ("x", np.int32), + ("y", np.int32), + ("z", np.int32), + ("particleid", np.int32), # conform to canonical dtype + ("name", np.str_), + ] + ), + } + + def __init__( + self, + filename: Union[str, os.PathLike], + header_filename: Optional[Union[str, os.PathLike]] = None, + destination_map: Optional[Dict[int, str]] = None, + verbose: bool = False, + ): + super().__init__(filename, verbose) + self.dtype, self._data = self._load(header_filename, destination_map) + + def _load( + self, + header_filename=None, + destination_map=None, + ) -> np.ndarray: + # load dtype from header file if present, otherwise use default dtype + hdr_fname = ( + None + if header_filename is None + else Path(header_filename).expanduser().absolute() + ) + if hdr_fname is not None and hdr_fname.is_file(): + lines = open(hdr_fname).readlines() + dtype = np.dtype( + { + "names": lines[0].strip().split(","), + "formats": lines[1].strip().split(","), + } + ) + else: + dtype = self.dtypes["base"] + + # read as binary or csv + try: + data = pd.read_csv(self.fname, dtype=dtype) + except UnicodeDecodeError: + try: + data = np.fromfile(self.fname, dtype=dtype) + except: + raise ValueError("Unreadable file, expected binary or CSV") + + # add particle id column + data = data.sort_values(self.key_cols) + data["particleid"] = data.groupby(self.key_cols).ngroup() + + # add release time and termination time columns + data["t0"] = ( + data.groupby("particleid") + .apply(lambda x: x.t.min()) + .to_frame(name="t0") + .t0 + ) + data["tt"] = ( + data.groupby("particleid") + .apply(lambda x: x.t.max()) + .to_frame(name="tt") + .tt + ) + + # assign destinations if zone map is provided + if destination_map is not None: + data["idest"] = data[data.istatus > 1].izone + data["dest"] = data.apply( + lambda row: destination_map[row.idest], + axis=1, + ) + + return dtype, data + + def intersect( + self, cells, to_recarray + ) -> Union[List[np.recarray], np.recarray]: + if isinstance(cells, (list, tuple)): + allint = all(isinstance(el, int) for el in cells) + if allint: + t = [] + for el in cells: + t.append((el,)) + cells = t + + icell = self._data[["icell"]] + cells = np.array(cells, dtype=icell.dtype) + inds = np.in1d(icell, cells) + epdest = self._data[inds].copy().view(np.recarray) + if to_recarray: + # use particle ids to get the rest of the paths + inds = np.in1d(self._data["particleid"], epdest.particleid) + series = self._data[inds].copy() + series.sort(order=["particleid", "time"]) + series = series.view(np.recarray) + else: + # collect unique particleids in selection + partids = np.unique(epdest["particleid"]) + series = [self.get_data(partid) for partid in partids] + + return series