From 176270b3e4035f84a1d6608ac038914f1eb66771 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 29 Sep 2023 14:07:42 -0700 Subject: [PATCH 01/65] wip --- morpheus/pipeline/stage_schema.py | 1 + 1 file changed, 1 insertion(+) create mode 100644 morpheus/pipeline/stage_schema.py diff --git a/morpheus/pipeline/stage_schema.py b/morpheus/pipeline/stage_schema.py new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/morpheus/pipeline/stage_schema.py @@ -0,0 +1 @@ + From 3593eb9cd8ff320519c0876a2588ce749d1a04e3 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 29 Sep 2023 14:21:31 -0700 Subject: [PATCH 02/65] wip --- morpheus/pipeline/stage_schema.py | 54 +++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/morpheus/pipeline/stage_schema.py b/morpheus/pipeline/stage_schema.py index 8b13789179..ee863ded6c 100644 --- a/morpheus/pipeline/stage_schema.py +++ b/morpheus/pipeline/stage_schema.py @@ -1 +1,55 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import typing + +if typing.TYPE_CHECKING: + from .stream_wrapper import StreamWrapper + + +class StagePortSchema: + + def __init__(self, port_type: type = None) -> None: + self._type = port_type + self._completed = False + + @property + def type(self) -> type: + return self._type + + @type.setter + def type(self, value: type): + assert not self._completed, "Attempted to set type on completed StagePortSchema" + + self._type = value + + def complete(self): + assert self.type is not None, "Attempted to complete StagePortSchema without setting type" + self._completed = True + + +class StageSchema: + + def __init__(self, stage: "StreamWrapper") -> None: + self._stage = stage + + self._input_ports = [p._schema for p in stage.input_ports] + self._output_ports = [StagePortSchema() for p in range(len(stage.output_ports))] + + def complete(self): + + for port in self._output_ports: + # This locks the port schema + port.complete() \ No newline at end of file From e788c81c1c4daa47535b81b6ee893aea1cdb258c Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 29 Sep 2023 14:53:10 -0700 Subject: [PATCH 03/65] First pass at PortSchema and StageSchema classes --- morpheus/pipeline/stage_schema.py | 41 ++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/morpheus/pipeline/stage_schema.py b/morpheus/pipeline/stage_schema.py index ee863ded6c..5785b4d44d 100644 --- a/morpheus/pipeline/stage_schema.py +++ b/morpheus/pipeline/stage_schema.py @@ -19,37 +19,48 @@ from .stream_wrapper import StreamWrapper -class StagePortSchema: +class PortSchema: - def __init__(self, port_type: type = None) -> None: + def __init__(self, port_type: type = None): self._type = port_type self._completed = False - @property - def type(self) -> type: + def get_type(self) -> type: return self._type - @type.setter - def type(self, value: type): - assert not self._completed, "Attempted to set type on completed StagePortSchema" - + def set_type(self, value: type): + assert not self._completed, "Attempted to set type on completed PortSchema" self._type = value def complete(self): - assert self.type is not None, "Attempted to complete StagePortSchema without setting type" + assert self._type is not None, "Attempted to complete PortSchema without setting type" self._completed = True + def is_completed(self) -> bool: + return self._completed + class StageSchema: - def __init__(self, stage: "StreamWrapper") -> None: + def __init__(self, stage: "StreamWrapper"): self._stage = stage - self._input_ports = [p._schema for p in stage.input_ports] - self._output_ports = [StagePortSchema() for p in range(len(stage.output_ports))] + self._input_schemas = [] + for port in stage.input_ports: + assert port._schema.is_completed(), "Attempted to create StageSchema with incomplete input port schemas" + self._input_schemas.append(port._schema) - def complete(self): + self._output_schemas = [PortSchema() for _ in range(len(stage.output_ports))] - for port in self._output_ports: + @property + def input_schemas(self) -> list[PortSchema]: + return self._input_schemas + + @property + def output_schemas(self) -> list[PortSchema]: + return self._output_schemas + + def complete(self): + for port_schema in self.output_schemas: # This locks the port schema - port.complete() \ No newline at end of file + port_schema.complete() From b7d18487ffa5b7bc9375546690ed8357ccc202fc Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 29 Sep 2023 14:58:56 -0700 Subject: [PATCH 04/65] Single port helpers --- morpheus/pipeline/stage_schema.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/morpheus/pipeline/stage_schema.py b/morpheus/pipeline/stage_schema.py index 5785b4d44d..1bcaa17209 100644 --- a/morpheus/pipeline/stage_schema.py +++ b/morpheus/pipeline/stage_schema.py @@ -43,7 +43,7 @@ def is_completed(self) -> bool: class StageSchema: def __init__(self, stage: "StreamWrapper"): - self._stage = stage + self._stage = stage # TODO: Determine if we need to hold a reference to the stage self._input_schemas = [] for port in stage.input_ports: @@ -56,10 +56,28 @@ def __init__(self, stage: "StreamWrapper"): def input_schemas(self) -> list[PortSchema]: return self._input_schemas + @property + def input_schema(self) -> PortSchema: + """ + Single port variant of input_schemas. Will fail if there are multiple input ports. + """ + assert len(self._input_schemas) == 1, \ + "Attempted to access input_schema property on StageSchema with multiple inputs" + return self._input_schemas[0] + @property def output_schemas(self) -> list[PortSchema]: return self._output_schemas + @property + def output_schema(self) -> PortSchema: + """ + Single port variant of output_schemas. Will fail if there are multiple output ports. + """ + assert len(self._output_schemas) == 1, \ + "Attempted to access output_schema property on StageSchema with multiple outputs" + return self._output_schemas[0] + def complete(self): for port_schema in self.output_schemas: # This locks the port schema From de86b9cf2e815504aefe21fbc35407bc72acc266 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 29 Sep 2023 15:34:25 -0700 Subject: [PATCH 05/65] wip --- morpheus/pipeline/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/morpheus/pipeline/__init__.py b/morpheus/pipeline/__init__.py index bf72674728..ed161f7049 100644 --- a/morpheus/pipeline/__init__.py +++ b/morpheus/pipeline/__init__.py @@ -20,6 +20,7 @@ from morpheus.pipeline.sender import Sender from morpheus.pipeline.receiver import Receiver +from morpheus.pipeline.stage_schema import StageSchema from morpheus.pipeline.stream_wrapper import StreamWrapper from morpheus.pipeline.stage import Stage from morpheus.pipeline.single_port_stage import SinglePortStage From e5a10023d7968592c877e08fabc19a3b901b0640 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 29 Sep 2023 15:49:59 -0700 Subject: [PATCH 06/65] WIP --- morpheus/pipeline/stage_schema.py | 7 +++++-- morpheus/pipeline/stream_wrapper.py | 14 ++++++++------ 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/morpheus/pipeline/stage_schema.py b/morpheus/pipeline/stage_schema.py index 1bcaa17209..e049bbd54d 100644 --- a/morpheus/pipeline/stage_schema.py +++ b/morpheus/pipeline/stage_schema.py @@ -33,6 +33,7 @@ def set_type(self, value: type): self._type = value def complete(self): + assert not self._completed, "Attempted to PortSchema.complete() twice" assert self._type is not None, "Attempted to complete PortSchema without setting type" self._completed = True @@ -43,8 +44,6 @@ def is_completed(self) -> bool: class StageSchema: def __init__(self, stage: "StreamWrapper"): - self._stage = stage # TODO: Determine if we need to hold a reference to the stage - self._input_schemas = [] for port in stage.input_ports: assert port._schema.is_completed(), "Attempted to create StageSchema with incomplete input port schemas" @@ -79,6 +78,10 @@ def output_schema(self) -> PortSchema: return self._output_schemas[0] def complete(self): + """ + Calls complete on all output port schemas. + This will trigger an assertion error if any of the output port schemas do not have a type set. + """ for port_schema in self.output_schemas: # This locks the port schema port_schema.complete() diff --git a/morpheus/pipeline/stream_wrapper.py b/morpheus/pipeline/stream_wrapper.py index 8dad056d32..7b5bca8b63 100644 --- a/morpheus/pipeline/stream_wrapper.py +++ b/morpheus/pipeline/stream_wrapper.py @@ -339,15 +339,17 @@ def can_build(self, check_ports=False) -> bool: def _pre_build(self, do_propagate: bool = True): assert not self.is_built, "build called prior to _pre_build" assert not self.is_pre_built, "Can only pre-build stages once!" - in_types: list[type] = [x.get_input_type() for x in self.input_ports] - out_types: list[type] = self.output_types(in_types) + schema = _pipeline.StageSchema(self) + self.output_types(schema) - assert len(out_types) == len(self.output_ports), \ + assert len(schema.output_schemas) == len(self.output_ports), \ (f"Prebuild expected `output_types()` to return {len(self.output_ports)} types (one for each output port), " - f"but got {len(out_types)}.") + f"but got {len(schema.output_schemas)}.") - for (port_idx, out_type) in enumerate(out_types): - self.output_ports[port_idx]._out_type = out_type + schema.complete() + + for (port_idx, port_schema) in enumerate(schema.output_schemas): + self.output_ports[port_idx]._schema = port_schema self._is_pre_built = True From 2978f1fc9403e7026f1613a99c76ebff9282bdfd Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 2 Oct 2023 08:17:39 -0700 Subject: [PATCH 07/65] docstrings --- morpheus/pipeline/stage_schema.py | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/morpheus/pipeline/stage_schema.py b/morpheus/pipeline/stage_schema.py index e049bbd54d..6bac038a6e 100644 --- a/morpheus/pipeline/stage_schema.py +++ b/morpheus/pipeline/stage_schema.py @@ -37,7 +37,7 @@ def complete(self): assert self._type is not None, "Attempted to complete PortSchema without setting type" self._completed = True - def is_completed(self) -> bool: + def is_complete(self) -> bool: return self._completed @@ -46,15 +46,27 @@ class StageSchema: def __init__(self, stage: "StreamWrapper"): self._input_schemas = [] for port in stage.input_ports: - assert port._schema.is_completed(), "Attempted to create StageSchema with incomplete input port schemas" - self._input_schemas.append(port._schema) + assert port.input_schema.is_complete(), "Attempted to create StageSchema with incomplete input port schemas" + self._input_schemas.append(port.input_schema) self._output_schemas = [PortSchema() for _ in range(len(stage.output_ports))] @property def input_schemas(self) -> list[PortSchema]: + """ + Return all input schemas, one for each input port. + """ return self._input_schemas + @property + def input_types(self) -> list[type]: + """ + Return the type associated with each input port. + + Convenience function for calling `port_schema.get_type()` for each element in `input_schemas`. + """ + return [port_schema.get_type() for port_schema in self._input_schemas] + @property def input_schema(self) -> PortSchema: """ @@ -64,8 +76,18 @@ def input_schema(self) -> PortSchema: "Attempted to access input_schema property on StageSchema with multiple inputs" return self._input_schemas[0] + @property + def input_type(self) -> type: + """ + Single port variant of input_types. Will fail if there are multiple input ports. + """ + return self.input_schema.get_type() + @property def output_schemas(self) -> list[PortSchema]: + """ + Return all output schemas, one for each output port. + """ return self._output_schemas @property From 16d3f9220b256e9437d5f5ac021ae88aeeb723ed Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 2 Oct 2023 10:13:35 -0700 Subject: [PATCH 08/65] WIP --- morpheus/pipeline/__init__.py | 3 +- morpheus/pipeline/pipeline.py | 14 +++--- morpheus/pipeline/receiver.py | 61 ++++++++++++++------------ morpheus/pipeline/sender.py | 18 +++++--- morpheus/pipeline/single_port_stage.py | 4 +- morpheus/pipeline/source_stage.py | 2 +- morpheus/pipeline/stream_wrapper.py | 4 +- 7 files changed, 58 insertions(+), 48 deletions(-) diff --git a/morpheus/pipeline/__init__.py b/morpheus/pipeline/__init__.py index ed161f7049..44e5d2fc77 100644 --- a/morpheus/pipeline/__init__.py +++ b/morpheus/pipeline/__init__.py @@ -18,9 +18,10 @@ # These must be imported in a specific order # isort: off +from morpheus.pipeline.stage_schema import PortSchema +from morpheus.pipeline.stage_schema import StageSchema from morpheus.pipeline.sender import Sender from morpheus.pipeline.receiver import Receiver -from morpheus.pipeline.stage_schema import StageSchema from morpheus.pipeline.stream_wrapper import StreamWrapper from morpheus.pipeline.stage import Stage from morpheus.pipeline.single_port_stage import SinglePortStage diff --git a/morpheus/pipeline/pipeline.py b/morpheus/pipeline/pipeline.py index b00ff64210..23ed3cc8eb 100644 --- a/morpheus/pipeline/pipeline.py +++ b/morpheus/pipeline/pipeline.py @@ -256,7 +256,7 @@ def _pre_build(self): # for s in source_and_stages: for stage in segment_graph.nodes(): for port in typing.cast(StreamWrapper, stage).input_ports: - port.link_type() + port.link_schema() logger.info("====Pre-Building Segment Complete!====") @@ -535,19 +535,19 @@ def has_ports(node: StreamWrapper, is_input): # Check for situation #1 if (len(in_port._input_senders) == 1 and len(out_port._output_receivers) == 1 - and (in_port.in_type == out_port.out_type)): + and (in_port.input_schema == out_port.output_schema)): - edge_attrs["label"] = pretty_print_type_name(in_port.in_type) + edge_attrs["label"] = pretty_print_type_name(in_port.input_schema) else: rec_idx = out_port._output_receivers.index(in_port) sen_idx = in_port._input_senders.index(out_port) # Add type labels if available - if (rec_idx == 0 and out_port.out_type is not None): - edge_attrs["taillabel"] = pretty_print_type_name(out_port.out_type) + if (rec_idx == 0 and out_port.output_schema is not None): + edge_attrs["taillabel"] = pretty_print_type_name(out_port.output_schema) - if (sen_idx == 0 and in_port.in_type is not None): - edge_attrs["headlabel"] = pretty_print_type_name(in_port.in_type) + if (sen_idx == 0 and in_port.input_schema is not None): + edge_attrs["headlabel"] = pretty_print_type_name(in_port.input_schema) gv_subgraph.edge(start_name, end_name, **edge_attrs) diff --git a/morpheus/pipeline/receiver.py b/morpheus/pipeline/receiver.py index 9da67ecc2a..cbb1e8e71d 100644 --- a/morpheus/pipeline/receiver.py +++ b/morpheus/pipeline/receiver.py @@ -41,10 +41,10 @@ def __init__(self, parent: "_pipeline.StreamWrapper", port_number: int): self._parent = parent self.port_number = port_number - self._is_type_linked = False + self._is_schema_linked = False self._is_node_linked = False - self._input_type: type = None + self._input_schema: _pipeline.PortSchema = None self._input_node: mrc.SegmentObject = None self._input_senders: typing.List[_pipeline.Sender] = [] @@ -70,8 +70,8 @@ def is_partial(self): return any(x.is_complete for x in self._input_senders) @property - def in_type(self): - return self._input_type + def input_schema(self) -> _pipeline.PortSchema: + return self._input_schema def get_input_node(self, builder: mrc.Builder) -> mrc.SegmentObject: """ @@ -87,8 +87,8 @@ def get_input_node(self, builder: mrc.Builder) -> mrc.SegmentObject: # In this case, our input stream/type is determined from the sole Sender sender = self._input_senders[0] - if sender.out_node is not None: - self._input_node = sender.out_node + if sender.output_node is not None: + self._input_node = sender.output_node self._is_node_linked = True else: # We have multiple senders. Create a dummy stream to connect all senders @@ -98,43 +98,47 @@ def get_input_node(self, builder: mrc.Builder) -> mrc.SegmentObject: if (self.is_complete): # Connect all streams now for input_sender in self._input_senders: - builder.make_edge(input_sender.out_node, self._input_node) + builder.make_edge(input_sender.output_node, self._input_node) self._is_node_linked = True return self._input_node - def get_input_type(self) -> type: - """ - Returns the the parent node's output type. - """ - + def get_input_schema(self) -> _pipeline.PortSchema: assert self.is_partial, "Must be partially complete to get the input type!" # Build the input from the senders - if (self._input_type is None): + if (self._input_schema is None): # First check if we only have 1 input sender if (len(self._input_senders) == 1): # In this case, our input stream/type is determined from the sole Sender sender = self._input_senders[0] - self._input_type = sender.out_type - self._is_type_linked = True - if sender.out_node is not None: - self._input_node = sender.out_node + self._input_schema = sender.output_schema + self._is_schema_linked = True + if sender.output_node is not None: + self._input_node = sender.output_node self._is_node_linked = True else: # Now determine the output type from what we have - great_ancestor = greatest_ancestor(*[x.out_type for x in self._input_senders if x.is_complete]) + great_ancestor = greatest_ancestor( + *[x.output_schema.get_type() for x in self._input_senders if x.is_complete]) if (great_ancestor is None): raise RuntimeError((f"Cannot determine single type for senders of input port for {self._parent}. " "Use a merge stage to handle different types of inputs.")) - self._input_type = great_ancestor + self._input_schema = _pipeline.PortSchema(port_type=great_ancestor) - return self._input_type + return self._input_schema + + def get_input_type(self) -> type: + """ + Returns the the upstream node's output type, and in case of multiple upstreams this will return the common + ancestor type. + """ + return self.get_input_schema().get_type() - def link_type(self): + def link_schema(self): """ The type linking phase determines the final type of the `Receiver`. @@ -145,16 +149,17 @@ def link_type(self): assert self.is_complete, "Must be complete before linking!" - if (self._is_type_linked): + if (self._is_schema_linked): return # Check that the types still work - great_ancestor = greatest_ancestor(*[x.out_type for x in self._input_senders if x.is_complete]) + great_ancestor = greatest_ancestor(*[x.output_schema.get_type() for x in self._input_senders if x.is_complete]) - if (not typing_utils.issubtype(great_ancestor, self._input_type)): - raise RuntimeError(f"Input port type {great_ancestor} does not match {self._input_type} for {self._parent}") + if (not typing_utils.issubtype(great_ancestor, self._input_schema)): + raise RuntimeError( + f"Input port type {great_ancestor} does not match {self._input_schema} for {self._parent}") - self._is_type_linked = True + self._is_schema_linked = True def link_node(self, builder: mrc.Builder): """ @@ -167,7 +172,7 @@ def link_node(self, builder: mrc.Builder): return for sender in self._input_senders: - assert sender.out_node is not self._input_node - builder.make_edge(sender.out_node, self._input_node) + assert sender.output_node is not self._input_node + builder.make_edge(sender.output_node, self._input_node) self._is_node_linked = True diff --git a/morpheus/pipeline/sender.py b/morpheus/pipeline/sender.py index 3398fb500c..78776e6e01 100644 --- a/morpheus/pipeline/sender.py +++ b/morpheus/pipeline/sender.py @@ -41,8 +41,8 @@ def __init__(self, parent: "_pipeline.StreamWrapper", port_number: int): self._output_receivers: typing.List[_pipeline.Receiver] = [] - self._out_type: type = None - self._out_node: mrc.SegmentObject = None + self._output_schema: _pipeline.PortSchema = None + self._output_node: mrc.SegmentObject = None @property def parent(self): @@ -51,12 +51,16 @@ def parent(self): @property def is_complete(self): # Sender is complete when the type has been set - return self._out_type is not None + return self._output_schema is not None @property - def out_type(self): - return self._out_type + def output_schema(self): + return self._output_schema + + @output_schema.setter + def output_schema(self, value: _pipeline.PortSchema): + self._output_schema = value @property - def out_node(self): - return self._out_node + def output_node(self): + return self._output_node diff --git a/morpheus/pipeline/single_port_stage.py b/morpheus/pipeline/single_port_stage.py index b58fc81d68..112ffffbf1 100644 --- a/morpheus/pipeline/single_port_stage.py +++ b/morpheus/pipeline/single_port_stage.py @@ -115,7 +115,7 @@ def _post_build(self, builder: mrc.Builder, out_ports_nodes: list[mrc.SegmentObj # pylint: disable=logging-format-interpolation logger.info("Added stage: %s\n └─ %s -> %s", str(self), - pretty_print_type_name(self.input_ports[0].in_type), - pretty_print_type_name(self.output_ports[0].out_type)) + pretty_print_type_name(self.input_ports[0].input_schema), + pretty_print_type_name(self.output_ports[0].output_schema)) return [ret_val] diff --git a/morpheus/pipeline/source_stage.py b/morpheus/pipeline/source_stage.py index 45a6c6b781..abd2b874f1 100644 --- a/morpheus/pipeline/source_stage.py +++ b/morpheus/pipeline/source_stage.py @@ -85,7 +85,7 @@ def _build(self, builder: mrc.Builder, input_nodes: list[mrc.SegmentObject]) -> assert len(sources) == len(self.output_ports), "Number of sources should match number of output ports" for (i, source) in enumerate(sources): - self._output_ports[i]._out_node = source + self._output_ports[i]._output_node = source self._sources.append(source) return sources diff --git a/morpheus/pipeline/stream_wrapper.py b/morpheus/pipeline/stream_wrapper.py index 7b5bca8b63..528b347d0d 100644 --- a/morpheus/pipeline/stream_wrapper.py +++ b/morpheus/pipeline/stream_wrapper.py @@ -349,7 +349,7 @@ def _pre_build(self, do_propagate: bool = True): schema.complete() for (port_idx, port_schema) in enumerate(schema.output_schemas): - self.output_ports[port_idx]._schema = port_schema + self.output_ports[port_idx].output_schema = port_schema self._is_pre_built = True @@ -390,7 +390,7 @@ def build(self, builder: mrc.Builder, do_propagate: bool = True): # Assign the output ports for port_idx, out_node in enumerate(out_ports_nodes): - self.output_ports[port_idx]._out_node = out_node + self.output_ports[port_idx]._output_node = out_node self._is_built = True From 679e534b81a558adf8f40f960c43d7b7c4d067bd Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 2 Oct 2023 10:40:18 -0700 Subject: [PATCH 09/65] consolidate input schema logic --- morpheus/pipeline/receiver.py | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/morpheus/pipeline/receiver.py b/morpheus/pipeline/receiver.py index cbb1e8e71d..5a70bcc01b 100644 --- a/morpheus/pipeline/receiver.py +++ b/morpheus/pipeline/receiver.py @@ -104,6 +104,16 @@ def get_input_node(self, builder: mrc.Builder) -> mrc.SegmentObject: return self._input_node + def _compute_input_schema(self): + great_ancestor = greatest_ancestor(*[x.output_schema.get_type() for x in self._input_senders if x.is_complete]) + + if (great_ancestor is None): + raise RuntimeError((f"Cannot determine single type for senders of input port for {self._parent}. " + "Use a merge stage to handle different types of inputs.")) + + self._input_schema = _pipeline.PortSchema(port_type=great_ancestor) + self._is_schema_linked = True + def get_input_schema(self) -> _pipeline.PortSchema: assert self.is_partial, "Must be partially complete to get the input type!" @@ -120,14 +130,7 @@ def get_input_schema(self) -> _pipeline.PortSchema: self._is_node_linked = True else: # Now determine the output type from what we have - great_ancestor = greatest_ancestor( - *[x.output_schema.get_type() for x in self._input_senders if x.is_complete]) - - if (great_ancestor is None): - raise RuntimeError((f"Cannot determine single type for senders of input port for {self._parent}. " - "Use a merge stage to handle different types of inputs.")) - - self._input_schema = _pipeline.PortSchema(port_type=great_ancestor) + self._compute_input_schema() return self._input_schema @@ -152,14 +155,7 @@ def link_schema(self): if (self._is_schema_linked): return - # Check that the types still work - great_ancestor = greatest_ancestor(*[x.output_schema.get_type() for x in self._input_senders if x.is_complete]) - - if (not typing_utils.issubtype(great_ancestor, self._input_schema)): - raise RuntimeError( - f"Input port type {great_ancestor} does not match {self._input_schema} for {self._parent}") - - self._is_schema_linked = True + self._compute_input_schema() def link_node(self, builder: mrc.Builder): """ From 0cdb07759c0a87ddfc6442211f98e29c2f371c8e Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 2 Oct 2023 11:13:34 -0700 Subject: [PATCH 10/65] WIP --- morpheus/pipeline/sender.py | 15 +++++++++++---- morpheus/pipeline/single_output_source.py | 21 ++++----------------- morpheus/pipeline/single_port_stage.py | 2 +- morpheus/pipeline/stage.py | 15 --------------- morpheus/pipeline/stream_wrapper.py | 14 +++++--------- 5 files changed, 21 insertions(+), 46 deletions(-) diff --git a/morpheus/pipeline/sender.py b/morpheus/pipeline/sender.py index 78776e6e01..ec74251b7f 100644 --- a/morpheus/pipeline/sender.py +++ b/morpheus/pipeline/sender.py @@ -45,16 +45,16 @@ def __init__(self, parent: "_pipeline.StreamWrapper", port_number: int): self._output_node: mrc.SegmentObject = None @property - def parent(self): + def parent(self) -> "_pipeline.StreamWrapper": return self._parent @property - def is_complete(self): + def is_complete(self) -> bool: # Sender is complete when the type has been set return self._output_schema is not None @property - def output_schema(self): + def output_schema(self) -> _pipeline.PortSchema: return self._output_schema @output_schema.setter @@ -62,5 +62,12 @@ def output_schema(self, value: _pipeline.PortSchema): self._output_schema = value @property - def output_node(self): + def output_type(self) -> type: + if self.is_complete: + return self._output_schema.get_type() + + return None + + @property + def output_node(self) -> mrc.SegmentObject: return self._output_node diff --git a/morpheus/pipeline/single_output_source.py b/morpheus/pipeline/single_output_source.py index cbd0e6b1fd..00e699a16d 100644 --- a/morpheus/pipeline/single_output_source.py +++ b/morpheus/pipeline/single_output_source.py @@ -66,30 +66,16 @@ def _build_sources(self, builder: mrc.Builder) -> list[mrc.SegmentObject]: @typing.final def _post_build(self, builder: mrc.Builder, out_ports_nodes: list[mrc.SegmentObject]) -> list[mrc.SegmentObject]: - + assert len(self.output_ports) == 1, "SingleOutputSource should have one output port" ret_val = self._post_build_single(builder, out_ports_nodes[0]) logger.info("Added source: %s\n └─> %s", self, pretty_print_type_name(self.output_type())) return [ret_val] - def output_types(self, parent_output_types: list[type]) -> list[type]: - """ - Return the output type for this stage. - - Returns - ------- - list - Output types. - - """ - assert len(parent_output_types) == 0, "Source stages should not have any parent stages." - return [self.output_type()] - - @abstractmethod def output_type(self) -> type: """ - Return the output type for this stage. Derived classes should override this method. + Return the output type for this stage. Returns ------- @@ -97,4 +83,5 @@ def output_type(self) -> type: Output type. """ - pass + assert len(self.output_ports) == 1, "SingleOutputSource should have one output port" + return self.output_ports[0].output_type diff --git a/morpheus/pipeline/single_port_stage.py b/morpheus/pipeline/single_port_stage.py index 112ffffbf1..4c15657820 100644 --- a/morpheus/pipeline/single_port_stage.py +++ b/morpheus/pipeline/single_port_stage.py @@ -56,7 +56,7 @@ def accepted_types(self) -> typing.Tuple: """ pass - def output_types(self, parent_output_types: list[type]) -> list[type]: + def compute_schema(self, upstream_schema: _pipeline.StageSchema) -> _pipeline.StageSchema: """ Return the output type for this stage. diff --git a/morpheus/pipeline/stage.py b/morpheus/pipeline/stage.py index 3d382578d4..b84b55ccbb 100644 --- a/morpheus/pipeline/stage.py +++ b/morpheus/pipeline/stage.py @@ -41,21 +41,6 @@ def _post_build(self, builder: mrc.Builder, out_ports_nodes: list[mrc.SegmentObj def _start(self): pass - @abstractmethod - def output_types(self, parent_output_types: list[type]) -> list[type]: - """ - Return the output types for this stage based on the incoming types of parent stages. Derived classes should - override this method, if the `parent_output_types` are incompatible then the stage should rase a - `RuntimeError` exception. - - Returns - ------- - list - Output types. - - """ - pass - async def start_async(self): """ This function is called along with on_start during stage initialization. Allows stages to utilize the diff --git a/morpheus/pipeline/stream_wrapper.py b/morpheus/pipeline/stream_wrapper.py index 528b347d0d..db92b6aca0 100644 --- a/morpheus/pipeline/stream_wrapper.py +++ b/morpheus/pipeline/stream_wrapper.py @@ -340,7 +340,7 @@ def _pre_build(self, do_propagate: bool = True): assert not self.is_built, "build called prior to _pre_build" assert not self.is_pre_built, "Can only pre-build stages once!" schema = _pipeline.StageSchema(self) - self.output_types(schema) + self.compute_schema(schema) assert len(schema.output_schemas) == len(self.output_ports), \ (f"Prebuild expected `output_types()` to return {len(self.output_ports)} types (one for each output port), " @@ -469,14 +469,10 @@ def get_needed_columns(self): return self._needed_columns.copy() @abstractmethod - def output_types(self, parent_output_types: list[type]) -> list[type]: + def compute_schema(self, upstream_schema: _pipeline.StageSchema) -> _pipeline.StageSchema: """ - Return the output types for this stage. Derived classes should override this method. - - Returns - ------- - list - Output types. - + Compute the output schema for this stage based on the incoming schema from upstream stages. + Derived classes should override this method. If the port types in `upstream_schema` are incompatible the stage + should raise a `RuntimeError`. """ pass From 3c56a8105fad726f533ffce7acde49640b77fc41 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 2 Oct 2023 11:53:15 -0700 Subject: [PATCH 11/65] WIP --- morpheus/pipeline/receiver.py | 3 +- morpheus/pipeline/single_output_source.py | 19 +++-------- morpheus/pipeline/single_port_stage.py | 40 +++++------------------ morpheus/pipeline/stream_wrapper.py | 27 ++++++++++++--- 4 files changed, 37 insertions(+), 52 deletions(-) diff --git a/morpheus/pipeline/receiver.py b/morpheus/pipeline/receiver.py index 5a70bcc01b..8e18574933 100644 --- a/morpheus/pipeline/receiver.py +++ b/morpheus/pipeline/receiver.py @@ -134,7 +134,8 @@ def get_input_schema(self) -> _pipeline.PortSchema: return self._input_schema - def get_input_type(self) -> type: + @property + def input_type(self) -> type: """ Returns the the upstream node's output type, and in case of multiple upstreams this will return the common ancestor type. diff --git a/morpheus/pipeline/single_output_source.py b/morpheus/pipeline/single_output_source.py index 00e699a16d..25a445f201 100644 --- a/morpheus/pipeline/single_output_source.py +++ b/morpheus/pipeline/single_output_source.py @@ -62,26 +62,15 @@ def _build_source(self, builder: mrc.Builder) -> mrc.SegmentObject: pass def _build_sources(self, builder: mrc.Builder) -> list[mrc.SegmentObject]: + assert len(self.output_ports) == 1, \ + f"SingleOutputSource should have only one output port, {self} has {len(self.output_ports)}" + return [self._build_source(builder)] @typing.final def _post_build(self, builder: mrc.Builder, out_ports_nodes: list[mrc.SegmentObject]) -> list[mrc.SegmentObject]: - assert len(self.output_ports) == 1, "SingleOutputSource should have one output port" ret_val = self._post_build_single(builder, out_ports_nodes[0]) - logger.info("Added source: %s\n └─> %s", self, pretty_print_type_name(self.output_type())) + logger.info("Added source: %s\n └─> %s", self, pretty_print_type_name(self.output_ports[0].output_type)) return [ret_val] - - def output_type(self) -> type: - """ - Return the output type for this stage. - - Returns - ------- - type - Output type. - - """ - assert len(self.output_ports) == 1, "SingleOutputSource should have one output port" - return self.output_ports[0].output_type diff --git a/morpheus/pipeline/single_port_stage.py b/morpheus/pipeline/single_port_stage.py index 4c15657820..6348d76282 100644 --- a/morpheus/pipeline/single_port_stage.py +++ b/morpheus/pipeline/single_port_stage.py @@ -56,39 +56,15 @@ def accepted_types(self) -> typing.Tuple: """ pass - def compute_schema(self, upstream_schema: _pipeline.StageSchema) -> _pipeline.StageSchema: - """ - Return the output type for this stage. - - Returns - ------- - list - Output types. - - """ + def _pre_compute_schema(self, schema: _pipeline.StageSchema): + # Pre-flight check to verify that the input type is one of the accepted types + super()._pre_compute_schema(schema) accepted_types = typing.Union[self.accepted_types()] - - assert len(parent_output_types) == 1, "SinglePortStage must have 1 input port and 1 output port" - parent_output_type = parent_output_types[0] - if (not typing_utils.issubtype(parent_output_type, accepted_types)): - raise RuntimeError((f"The {self.name} stage cannot handle input of {parent_output_type}. " + input_type = schema.input_type + if (not typing_utils.issubtype(input_type, accepted_types)): + raise RuntimeError((f"The {self.name} stage cannot handle input of {input_type}. " f"Accepted input types: {self.accepted_types()}")) - return [self.output_type(parent_output_type)] - - @abstractmethod - def output_type(self, parent_output_type: type) -> type: - """ - Return the output type for this stage. Derived classes should override this method. - - Returns - ------- - type - Output type. - - """ - pass - @abstractmethod def _build_single(self, builder: mrc.Builder, input_node: mrc.SegmentObject) -> mrc.SegmentObject: pass @@ -115,7 +91,7 @@ def _post_build(self, builder: mrc.Builder, out_ports_nodes: list[mrc.SegmentObj # pylint: disable=logging-format-interpolation logger.info("Added stage: %s\n └─ %s -> %s", str(self), - pretty_print_type_name(self.input_ports[0].input_schema), - pretty_print_type_name(self.output_ports[0].output_schema)) + pretty_print_type_name(self.input_ports[0].input_type), + pretty_print_type_name(self.output_ports[0].output_type)) return [ret_val] diff --git a/morpheus/pipeline/stream_wrapper.py b/morpheus/pipeline/stream_wrapper.py index db92b6aca0..764cfc505b 100644 --- a/morpheus/pipeline/stream_wrapper.py +++ b/morpheus/pipeline/stream_wrapper.py @@ -340,6 +340,7 @@ def _pre_build(self, do_propagate: bool = True): assert not self.is_built, "build called prior to _pre_build" assert not self.is_pre_built, "Can only pre-build stages once!" schema = _pipeline.StageSchema(self) + self._pre_compute_schema(schema) self.compute_schema(schema) assert len(schema.output_schemas) == len(self.output_ports), \ @@ -469,10 +470,28 @@ def get_needed_columns(self): return self._needed_columns.copy() @abstractmethod - def compute_schema(self, upstream_schema: _pipeline.StageSchema) -> _pipeline.StageSchema: + def compute_schema(self, schema: _pipeline.StageSchema): """ - Compute the output schema for this stage based on the incoming schema from upstream stages. - Derived classes should override this method. If the port types in `upstream_schema` are incompatible the stage - should raise a `RuntimeError`. + Compute the schema for this stage based on the incoming schema from upstream stages. + + Incoming schema and type information from upstream stages is available via the `schema.input_schemas` and + `schema.input_types` properties. + + Derived classes need to override this method, can set the output type(s) on `schema` by calling `set_type` for + all output ports. For example a simple pass-thru stage might perform the following: + + ``` + >>> for (port_idx, port_schema) in schema.input_schemas: + >>> schema.output_schemas[port_idx].set_type(port_schema.get_type()) + ``` + + If the port types in `upstream_schema` are incompatible the stage should raise a `RuntimeError`. + """ + pass + + def _pre_compute_schema(self, schema: _pipeline.StageSchema): + """ + Optional pre-flight method, allows base classes like `SinglePortStage` to perform pre-flight checks prior to + `compute_schema` being called. """ pass From aaa81875d9a3a2519a7dc85cf47c8008f43d9b28 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 2 Oct 2023 11:56:12 -0700 Subject: [PATCH 12/65] Update the pass-thru-mixin --- morpheus/pipeline/pass_thru_type_mixin.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/morpheus/pipeline/pass_thru_type_mixin.py b/morpheus/pipeline/pass_thru_type_mixin.py index 3fb6548fd8..fe47052239 100644 --- a/morpheus/pipeline/pass_thru_type_mixin.py +++ b/morpheus/pipeline/pass_thru_type_mixin.py @@ -14,6 +14,8 @@ """Mixin for single port stages which receive and emit the same type.""" from abc import ABC +from morpheus.pipeline.stage_schema import StageSchema + class PassThruTypeMixin(ABC): """ @@ -21,5 +23,6 @@ class PassThruTypeMixin(ABC): `typing.Any`, and who's output type is inferred from the output types of the parent stages. """ - def output_type(self, parent_output_type: type) -> type: - return parent_output_type + def compute_schema(self, schema: StageSchema): + for (port_idx, port_schema) in schema.input_schemas: + schema.output_schemas[port_idx].set_type(port_schema.get_type()) From c490ab9a8802d6225ccbfc4c4fd51c65712cccf1 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 2 Oct 2023 12:56:45 -0700 Subject: [PATCH 13/65] Replace output_type with compute_schema --- .../2_2_rabbitmq/rabbitmq_source_stage.py | 5 ++-- .../rabbitmq_source_stage.py | 5 ++-- .../dfp/stages/dfp_file_batcher_stage.py | 5 ++-- .../morpheus/dfp/stages/dfp_file_to_df.py | 5 ++-- .../dfp/stages/dfp_inference_stage.py | 5 ++-- .../dfp/stages/dfp_rolling_window_stage.py | 5 ++-- .../dfp/stages/dfp_split_users_stage.py | 5 ++-- .../morpheus/dfp/stages/dfp_training.py | 11 +++++---- .../morpheus/dfp/stages/multi_file_source.py | 5 ++-- .../stages/classification_stage.py | 5 ++-- .../stages/graph_construction_stage.py | 5 ++-- .../stages/graph_sage_stage.py | 5 ++-- examples/log_parsing/inference.py | 5 ++-- examples/log_parsing/postprocessing.py | 5 ++-- examples/sid_visualization/run.py | 5 ++-- morpheus/pipeline/multi_message_stage.py | 4 ++-- morpheus/pipeline/stage_schema.py | 1 + morpheus/pipeline/stream_wrapper.py | 4 ++-- .../stages/boundary/linear_boundary_stage.py | 23 ++++--------------- morpheus/stages/doca/doca_source_stage.py | 5 ++-- .../stages/general/linear_modules_stage.py | 5 ++-- .../general/multi_port_modules_stage.py | 6 +++-- morpheus/stages/inference/inference_stage.py | 5 ++-- .../stages/input/appshield_source_stage.py | 5 ++-- .../stages/input/autoencoder_source_stage.py | 5 ++-- .../control_message_file_source_stage.py | 5 ++-- .../control_message_kafka_source_stage.py | 5 ++-- morpheus/stages/input/file_source_stage.py | 5 ++-- .../stages/input/http_client_source_stage.py | 5 ++-- .../stages/input/http_server_source_stage.py | 5 ++-- .../stages/input/in_memory_source_stage.py | 5 ++-- morpheus/stages/input/kafka_source_stage.py | 5 ++-- morpheus/stages/input/rss_source_stage.py | 5 ++-- .../postprocess/filter_detections_stage.py | 7 +++--- .../stages/postprocess/serialize_stage.py | 5 ++-- .../preprocess/preprocess_base_stage.py | 5 ++-- morpheus/stages/preprocess/train_ae_stage.py | 5 ++-- tests/_utils/stages/conv_msg.py | 5 ++-- .../_utils/stages/in_memory_source_x_stage.py | 5 ++-- tests/_utils/stages/split_stage.py | 7 +++++- tests/benchmarks/static_message_source.py | 5 ++-- 41 files changed, 129 insertions(+), 99 deletions(-) diff --git a/examples/developer_guide/2_2_rabbitmq/rabbitmq_source_stage.py b/examples/developer_guide/2_2_rabbitmq/rabbitmq_source_stage.py index f15558191f..bd285a5eca 100644 --- a/examples/developer_guide/2_2_rabbitmq/rabbitmq_source_stage.py +++ b/examples/developer_guide/2_2_rabbitmq/rabbitmq_source_stage.py @@ -28,6 +28,7 @@ from morpheus.messages.message_meta import MessageMeta from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.single_output_source import SingleOutputSource +from morpheus.pipeline.stage_schema import StageSchema logger = logging.getLogger(__name__) @@ -85,8 +86,8 @@ def name(self) -> str: def supports_cpp_node(self) -> bool: return False - def output_type(self) -> type: - return MessageMeta + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MessageMeta) def stop(self): # Indicate we need to stop diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/rabbitmq_source_stage.py b/examples/developer_guide/4_rabbitmq_cpp_stage/rabbitmq_source_stage.py index 17004dc339..6845be54b4 100755 --- a/examples/developer_guide/4_rabbitmq_cpp_stage/rabbitmq_source_stage.py +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/rabbitmq_source_stage.py @@ -28,6 +28,7 @@ from morpheus.messages.message_meta import MessageMeta from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.single_output_source import SingleOutputSource +from morpheus.pipeline.stage_schema import StageSchema logger = logging.getLogger(__name__) @@ -81,8 +82,8 @@ def name(self) -> str: def supports_cpp_node(self) -> bool: return True - def output_type(self) -> type: - return MessageMeta + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MessageMeta) def _build_source(self, builder: mrc.Builder) -> mrc.SegmentObject: if self._build_cpp_node(): diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_file_batcher_stage.py b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_file_batcher_stage.py index 9a065dde46..a3c61d149f 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_file_batcher_stage.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_file_batcher_stage.py @@ -26,6 +26,7 @@ from morpheus.config import Config from morpheus.pipeline.single_port_stage import SinglePortStage +from morpheus.pipeline.stage_schema import StageSchema logger = logging.getLogger(f"morpheus.{__name__}") @@ -107,8 +108,8 @@ def accepted_types(self) -> typing.Tuple: """Accepted incoming types for this stage""" return (fsspec.core.OpenFiles, ) - def output_type(self, parent_output_type: type) -> type: - return typing.Tuple[fsspec.core.OpenFiles, int] + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(typing.Tuple[fsspec.core.OpenFiles, int]) def on_data(self, file_objects: fsspec.core.OpenFiles) -> typing.List[typing.Tuple[fsspec.core.OpenFiles, int]]: """ diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_file_to_df.py b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_file_to_df.py index eb1d263cdc..b482cbaf2e 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_file_to_df.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_file_to_df.py @@ -25,6 +25,7 @@ from morpheus.controllers.file_to_df_controller import FileToDFController from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.single_port_stage import SinglePortStage +from morpheus.pipeline.stage_schema import StageSchema from morpheus.utils.column_info import DataFrameInputSchema logger = logging.getLogger(f"morpheus.{__name__}") @@ -83,8 +84,8 @@ def accepted_types(self) -> typing.Tuple: """Accepted input types.""" return (typing.Any, ) - def output_type(self, parent_output_type: type) -> type: - return pd.DataFrame + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(pd.DataFrame) def _build_single(self, builder: mrc.Builder, input_node: mrc.SegmentObject) -> mrc.SegmentObject: node = builder.make_node(self.unique_name, diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_inference_stage.py b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_inference_stage.py index bee7759506..79b6ea2da2 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_inference_stage.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_inference_stage.py @@ -24,6 +24,7 @@ from morpheus.config import Config from morpheus.messages.multi_ae_message import MultiAEMessage from morpheus.pipeline.single_port_stage import SinglePortStage +from morpheus.pipeline.stage_schema import StageSchema from ..messages.multi_dfp_message import MultiDFPMessage from ..utils.model_cache import ModelCache @@ -71,8 +72,8 @@ def accepted_types(self) -> typing.Tuple: """Accepted input types.""" return (MultiDFPMessage, ) - def output_type(self, parent_output_type: type) -> type: - return MultiAEMessage + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MultiAEMessage) def get_model(self, user: str) -> ModelCache: """ diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_rolling_window_stage.py b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_rolling_window_stage.py index d6e45223c9..775853640b 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_rolling_window_stage.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_rolling_window_stage.py @@ -24,6 +24,7 @@ from morpheus.config import Config from morpheus.pipeline.single_port_stage import SinglePortStage +from morpheus.pipeline.stage_schema import StageSchema from ..messages.multi_dfp_message import DFPMessageMeta from ..messages.multi_dfp_message import MultiDFPMessage @@ -90,8 +91,8 @@ def accepted_types(self) -> typing.Tuple: """Input types accepted by this stage.""" return (DFPMessageMeta, ) - def output_type(self, parent_output_type: type) -> type: - return MultiDFPMessage + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MultiDFPMessage) @contextmanager def _get_user_cache(self, user_id: str) -> typing.Generator[CachedUserWindow, None, None]: diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_split_users_stage.py b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_split_users_stage.py index ff6bc04c23..0c27e3c58d 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_split_users_stage.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_split_users_stage.py @@ -25,6 +25,7 @@ from morpheus.config import Config from morpheus.pipeline.single_port_stage import SinglePortStage +from morpheus.pipeline.stage_schema import StageSchema from morpheus.utils.type_aliases import DataFrameType from ..messages.multi_dfp_message import DFPMessageMeta @@ -84,8 +85,8 @@ def accepted_types(self) -> typing.Tuple: """Input types accepted by this stage.""" return (cudf.DataFrame, pd.DataFrame) - def output_type(self, parent_output_type: type) -> type: - return DFPMessageMeta + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(DFPMessageMeta) def extract_users(self, message: DataFrameType) -> typing.List[DFPMessageMeta]: """ diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_training.py b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_training.py index 15531907b5..a346d1f4a7 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_training.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp/stages/dfp_training.py @@ -26,6 +26,7 @@ from morpheus.messages.multi_ae_message import MultiAEMessage from morpheus.models.dfencoder import AutoEncoder from morpheus.pipeline.single_port_stage import SinglePortStage +from morpheus.pipeline.stage_schema import StageSchema from ..messages.multi_dfp_message import DFPMessageMeta from ..messages.multi_dfp_message import MultiDFPMessage @@ -94,11 +95,11 @@ def accepted_types(self) -> typing.Tuple: MultiDFPMessage, ) - def output_type(self, parent_output_type: type) -> type: - return_type = parent_output_type - if (return_type == MultiDFPMessage): - return_type = MultiAEMessage - return return_type + def compute_schema(self, schema: StageSchema): + output_type = schema.input_type + if (output_type == MultiDFPMessage): + output_type = MultiAEMessage + schema.output_schema.set_type(output_type) def _dfp_multimessage_from_control_message(self, control_message: ControlMessage) -> typing.Union[MultiDFPMessage, None]: diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/stages/multi_file_source.py b/examples/digital_fingerprinting/production/morpheus/dfp/stages/multi_file_source.py index b4d3482a4a..aad79d33c7 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/stages/multi_file_source.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp/stages/multi_file_source.py @@ -23,6 +23,7 @@ from morpheus.config import Config from morpheus.pipeline.single_output_source import SingleOutputSource +from morpheus.pipeline.stage_schema import StageSchema logger = logging.getLogger(f"morpheus.{__name__}") @@ -76,8 +77,8 @@ def input_count(self) -> int: """Return None for no max intput count""" return self._input_count - def output_type(self) -> type: - return fsspec.core.OpenFiles + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(fsspec.core.OpenFiles) def supports_cpp_node(self): """Indicates whether this stage supports C++ nodes.""" diff --git a/examples/gnn_fraud_detection_pipeline/stages/classification_stage.py b/examples/gnn_fraud_detection_pipeline/stages/classification_stage.py index c25d860144..56e853303c 100644 --- a/examples/gnn_fraud_detection_pipeline/stages/classification_stage.py +++ b/examples/gnn_fraud_detection_pipeline/stages/classification_stage.py @@ -24,6 +24,7 @@ from morpheus.config import PipelineModes from morpheus.messages import MultiMessage from morpheus.pipeline.single_port_stage import SinglePortStage +from morpheus.pipeline.stage_schema import StageSchema from .graph_sage_stage import GraphSAGEMultiMessage @@ -55,8 +56,8 @@ def name(self) -> str: def accepted_types(self) -> (GraphSAGEMultiMessage, ): return (GraphSAGEMultiMessage, ) - def output_type(self, parent_output_type: type) -> type: - return MultiMessage + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MultiMessage) def supports_cpp_node(self) -> bool: return False diff --git a/examples/gnn_fraud_detection_pipeline/stages/graph_construction_stage.py b/examples/gnn_fraud_detection_pipeline/stages/graph_construction_stage.py index ea5e970201..81bf2211d4 100644 --- a/examples/gnn_fraud_detection_pipeline/stages/graph_construction_stage.py +++ b/examples/gnn_fraud_detection_pipeline/stages/graph_construction_stage.py @@ -29,6 +29,7 @@ from morpheus.messages import MultiMessage from morpheus.messages.message_meta import MessageMeta from morpheus.pipeline.single_port_stage import SinglePortStage +from morpheus.pipeline.stage_schema import StageSchema from .model import build_fsi_graph from .model import prepare_data @@ -77,8 +78,8 @@ def name(self) -> str: def accepted_types(self) -> (MultiMessage, ): return (MultiMessage, ) - def output_type(self, parent_output_type: type) -> type: - return FraudGraphMultiMessage + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(FraudGraphMultiMessage) def supports_cpp_node(self) -> bool: return False diff --git a/examples/gnn_fraud_detection_pipeline/stages/graph_sage_stage.py b/examples/gnn_fraud_detection_pipeline/stages/graph_sage_stage.py index b82d3e2287..3dc9f56c6a 100644 --- a/examples/gnn_fraud_detection_pipeline/stages/graph_sage_stage.py +++ b/examples/gnn_fraud_detection_pipeline/stages/graph_sage_stage.py @@ -26,6 +26,7 @@ from morpheus.messages import MultiMessage from morpheus.messages.message_meta import MessageMeta from morpheus.pipeline.single_port_stage import SinglePortStage +from morpheus.pipeline.stage_schema import StageSchema from .graph_construction_stage import FraudGraphMultiMessage from .model import load_model @@ -72,8 +73,8 @@ def name(self) -> str: def accepted_types(self) -> (FraudGraphMultiMessage, ): return (FraudGraphMultiMessage, ) - def output_type(self, parent_output_type: type) -> type: - return GraphSAGEMultiMessage + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(GraphSAGEMultiMessage) def supports_cpp_node(self) -> bool: return False diff --git a/examples/log_parsing/inference.py b/examples/log_parsing/inference.py index ab9f9f9804..f298cbce64 100644 --- a/examples/log_parsing/inference.py +++ b/examples/log_parsing/inference.py @@ -30,6 +30,7 @@ from morpheus.config import Config from morpheus.config import PipelineModes from morpheus.messages import MultiInferenceMessage +from morpheus.pipeline.stage_schema import StageSchema from morpheus.stages.inference.inference_stage import InferenceStage from morpheus.stages.inference.inference_stage import InferenceWorker from morpheus.stages.inference.triton_inference_stage import _TritonInferenceWorker @@ -174,8 +175,8 @@ def supports_cpp_node(self): # Get the value from the worker class return False - def output_type(self, parent_output_type: type) -> type: - return MultiPostprocLogParsingMessage + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MultiPostprocLogParsingMessage) def _build_single(self, builder: mrc.Builder, input_node: mrc.SegmentObject) -> mrc.SegmentObject: diff --git a/examples/log_parsing/postprocessing.py b/examples/log_parsing/postprocessing.py index d32dd40b4f..c36d7c897e 100644 --- a/examples/log_parsing/postprocessing.py +++ b/examples/log_parsing/postprocessing.py @@ -28,6 +28,7 @@ from morpheus.config import PipelineModes from morpheus.messages import MessageMeta from morpheus.pipeline.single_port_stage import SinglePortStage +from morpheus.pipeline.stage_schema import StageSchema @register_stage("log-postprocess", modes=[PipelineModes.NLP]) @@ -74,8 +75,8 @@ def supports_cpp_node(self): def accepted_types(self) -> typing.Tuple: return (MultiPostprocLogParsingMessage, ) - def output_type(self, parent_output_type: type) -> type: - return MessageMeta + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MessageMeta) def _postprocess(self, x: MultiPostprocLogParsingMessage): diff --git a/examples/sid_visualization/run.py b/examples/sid_visualization/run.py index d2a2376a8f..2a64dfbd20 100644 --- a/examples/sid_visualization/run.py +++ b/examples/sid_visualization/run.py @@ -28,6 +28,7 @@ from morpheus.pipeline.linear_pipeline import LinearPipeline from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.single_output_source import SingleOutputSource +from morpheus.pipeline.stage_schema import StageSchema from morpheus.stages.general.monitor_stage import MonitorStage from morpheus.stages.inference.triton_inference_stage import TritonInferenceStage from morpheus.stages.postprocess.add_classifications_stage import AddClassificationsStage @@ -84,8 +85,8 @@ def input_count(self) -> int: """Return None for no max intput count""" return self._input_count - def output_type(self) -> type: - return MessageMeta + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MessageMeta) def supports_cpp_node(self): return False diff --git a/morpheus/pipeline/multi_message_stage.py b/morpheus/pipeline/multi_message_stage.py index 9b9d2dcd7c..a9d49f9825 100644 --- a/morpheus/pipeline/multi_message_stage.py +++ b/morpheus/pipeline/multi_message_stage.py @@ -45,8 +45,8 @@ def __init__(self, c: Config): super().__init__(c) - def output_type(self, parent_output_type: type) -> type: - return MultiMessage + def compute_schema(self, schema: _pipeline.StageSchema): + schema.output_schema.set_type(MultiMessage) def _post_build_single(self, builder: mrc.Builder, out_node: mrc.SegmentObject) -> mrc.SegmentObject: diff --git a/morpheus/pipeline/stage_schema.py b/morpheus/pipeline/stage_schema.py index 6bac038a6e..f1c261cd81 100644 --- a/morpheus/pipeline/stage_schema.py +++ b/morpheus/pipeline/stage_schema.py @@ -98,6 +98,7 @@ def output_schema(self) -> PortSchema: assert len(self._output_schemas) == 1, \ "Attempted to access output_schema property on StageSchema with multiple outputs" return self._output_schemas[0] + def complete(self): """ diff --git a/morpheus/pipeline/stream_wrapper.py b/morpheus/pipeline/stream_wrapper.py index 764cfc505b..42dabd51ba 100644 --- a/morpheus/pipeline/stream_wrapper.py +++ b/morpheus/pipeline/stream_wrapper.py @@ -344,8 +344,8 @@ def _pre_build(self, do_propagate: bool = True): self.compute_schema(schema) assert len(schema.output_schemas) == len(self.output_ports), \ - (f"Prebuild expected `output_types()` to return {len(self.output_ports)} types (one for each output port), " - f"but got {len(schema.output_schemas)}.") + (f"Prebuild expected `schema.output_schemas` to be of length {len(self.output_ports)} " + f"(one for each output port), but got {len(schema.output_schemas)}.") schema.complete() diff --git a/morpheus/stages/boundary/linear_boundary_stage.py b/morpheus/stages/boundary/linear_boundary_stage.py index a32088a6ce..14d1db1858 100644 --- a/morpheus/stages/boundary/linear_boundary_stage.py +++ b/morpheus/stages/boundary/linear_boundary_stage.py @@ -19,14 +19,16 @@ from mrc.core import operators as ops from morpheus.config import Config +from morpheus.pipeline.pass_thru_type_mixin import PassThruTypeMixin from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.single_output_source import SingleOutputSource from morpheus.pipeline.single_port_stage import SinglePortStage +from morpheus.pipeline.stage_schema import StageSchema logger = logging.getLogger(__name__) -class LinearBoundaryEgressStage(SinglePortStage): +class LinearBoundaryEgressStage(PassThruTypeMixin, SinglePortStage): """ The LinearBoundaryEgressStage acts as an egress point from one linear segment to another. Given an existing linear pipeline that we want to connect to another segment, a linear boundary egress stage would be added, in conjunction @@ -69,9 +71,6 @@ def accepted_types(self) -> typing.Tuple: """ return (self._output_type, ) - def output_type(self, parent_output_type: type) -> type: - return parent_output_type - def supports_cpp_node(self): return False @@ -113,20 +112,8 @@ def __init__(self, c: Config, boundary_port_id: str, data_type=None): def name(self) -> str: return "segment_boundary_ingress" - def accepted_types(self) -> typing.Tuple: - """ - Accepted input types for this stage are returned. - - Returns - ------- - typing.Tuple - Accepted input types. - - """ - return (self._output_type, ) - - def output_type(self) -> type: - return self._output_type + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(self._output_type) def supports_cpp_node(self): return False diff --git a/morpheus/stages/doca/doca_source_stage.py b/morpheus/stages/doca/doca_source_stage.py index c95c27bc36..f125a505f0 100644 --- a/morpheus/stages/doca/doca_source_stage.py +++ b/morpheus/stages/doca/doca_source_stage.py @@ -22,6 +22,7 @@ from morpheus.messages import MessageMeta from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.single_output_source import SingleOutputSource +from morpheus.pipeline.stage_schema import StageSchema logger = logging.getLogger(__name__) @@ -76,8 +77,8 @@ def input_count(self) -> int: """Return None for no max input count""" return None - def output_type(self) -> type: - return MessageMeta + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MessageMeta) def supports_cpp_node(self): return True diff --git a/morpheus/stages/general/linear_modules_stage.py b/morpheus/stages/general/linear_modules_stage.py index 818c7358fd..8ff32fbd9d 100644 --- a/morpheus/stages/general/linear_modules_stage.py +++ b/morpheus/stages/general/linear_modules_stage.py @@ -19,6 +19,7 @@ from morpheus.config import Config from morpheus.pipeline.single_port_stage import SinglePortStage +from morpheus.pipeline.stage_schema import StageSchema from morpheus.utils.module_utils import load_module logger = logging.getLogger(__name__) @@ -87,8 +88,8 @@ def accepted_types(self) -> typing.Tuple: """ return (self._input_type, ) - def output_type(self, parent_output_type: type) -> type: - return self._output_type + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(self._output_type) def _get_cpp_module_node(self, builder: mrc.Builder) -> mrc.SegmentObject: raise NotImplementedError("No C++ node is available for this module type") diff --git a/morpheus/stages/general/multi_port_modules_stage.py b/morpheus/stages/general/multi_port_modules_stage.py index 694915e724..a6336c91aa 100644 --- a/morpheus/stages/general/multi_port_modules_stage.py +++ b/morpheus/stages/general/multi_port_modules_stage.py @@ -20,6 +20,7 @@ from morpheus.config import Config from morpheus.pipeline.stage import Stage +from morpheus.pipeline.stage_schema import StageSchema from morpheus.utils.module_utils import load_module logger = logging.getLogger(__name__) @@ -98,8 +99,9 @@ def accepted_types(self) -> typing.Tuple: """ return (typing.Any, ) - def output_types(self, parent_output_types: list[type]) -> list[type]: - return [self._ouput_type for _ in range(self._num_out_ports)] + def compute_schema(self, schema: StageSchema): + for port_schema in schema.output_schemas: + port_schema.set_type(self._ouput_type) def _validate_ports(self, module) -> None: diff --git a/morpheus/stages/inference/inference_stage.py b/morpheus/stages/inference/inference_stage.py index 4051a39d8a..e68eb6e721 100644 --- a/morpheus/stages/inference/inference_stage.py +++ b/morpheus/stages/inference/inference_stage.py @@ -26,6 +26,7 @@ from morpheus.messages import MultiResponseMessage from morpheus.messages.memory.tensor_memory import TensorMemory from morpheus.pipeline.multi_message_stage import MultiMessageStage +from morpheus.pipeline.stage_schema import StageSchema from morpheus.utils.producer_consumer_queue import ProducerConsumerQueue @@ -181,8 +182,8 @@ def accepted_types(self) -> typing.Tuple: """ return (MultiInferenceMessage, ) - def output_type(self, parent_output_type: type) -> type: - return MultiResponseMessage + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MultiResponseMessage) def supports_cpp_node(self): # Default to False unless derived classes override this value diff --git a/morpheus/stages/input/appshield_source_stage.py b/morpheus/stages/input/appshield_source_stage.py index 46f7807946..ea3858d8fb 100644 --- a/morpheus/stages/input/appshield_source_stage.py +++ b/morpheus/stages/input/appshield_source_stage.py @@ -30,6 +30,7 @@ from morpheus.messages.message_meta import AppShieldMessageMeta from morpheus.pipeline import SingleOutputSource from morpheus.pipeline.preallocator_mixin import PreallocatorMixin +from morpheus.pipeline.stage_schema import StageSchema from morpheus.utils.directory_watcher import DirectoryWatcher logger = logging.getLogger(__name__) @@ -121,8 +122,8 @@ def input_count(self) -> int: def supports_cpp_node(self): return False - def output_type(self) -> type: - return AppShieldMessageMeta + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(AppShieldMessageMeta) @staticmethod def fill_interested_cols(plugin_df: pd.DataFrame, cols_include: typing.List[str]): diff --git a/morpheus/stages/input/autoencoder_source_stage.py b/morpheus/stages/input/autoencoder_source_stage.py index fa953f0208..fd0bc325ae 100644 --- a/morpheus/stages/input/autoencoder_source_stage.py +++ b/morpheus/stages/input/autoencoder_source_stage.py @@ -27,6 +27,7 @@ from morpheus.messages import UserMessageMeta from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.single_output_source import SingleOutputSource +from morpheus.pipeline.stage_schema import StageSchema from morpheus.utils.directory_watcher import DirectoryWatcher logger = logging.getLogger(__name__) @@ -112,8 +113,8 @@ def input_count(self) -> int: """Return None for no max input count""" return self._input_count if self._input_count is not None else 0 - def output_type(self) -> type: - return UserMessageMeta + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(UserMessageMeta) def get_match_pattern(self, glob_split): """Return a file match pattern""" diff --git a/morpheus/stages/input/control_message_file_source_stage.py b/morpheus/stages/input/control_message_file_source_stage.py index 4f6c1d2a6c..173d7998ee 100644 --- a/morpheus/stages/input/control_message_file_source_stage.py +++ b/morpheus/stages/input/control_message_file_source_stage.py @@ -23,6 +23,7 @@ from morpheus.config import Config from morpheus.messages import ControlMessage from morpheus.pipeline.single_output_source import SingleOutputSource +from morpheus.pipeline.stage_schema import StageSchema logger = logging.getLogger(f"morpheus.{__name__}") @@ -49,8 +50,8 @@ def __init__(self, c: Config, filenames: typing.List[str]): def name(self) -> str: return "from-message-control" - def output_type(self) -> type: - return fsspec.core.OpenFiles + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(fsspec.core.OpenFiles) def supports_cpp_node(self): return True diff --git a/morpheus/stages/input/control_message_kafka_source_stage.py b/morpheus/stages/input/control_message_kafka_source_stage.py index e1dff22890..9413e68966 100644 --- a/morpheus/stages/input/control_message_kafka_source_stage.py +++ b/morpheus/stages/input/control_message_kafka_source_stage.py @@ -27,6 +27,7 @@ from morpheus.messages import ControlMessage from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.single_output_source import SingleOutputSource +from morpheus.pipeline.stage_schema import StageSchema from morpheus.stages.input.kafka_source_stage import AutoOffsetReset logger = logging.getLogger(__name__) @@ -125,8 +126,8 @@ def name(self) -> str: def supports_cpp_node(self): return False - def output_type(self) -> type: - return ControlMessage + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(ControlMessage) def _process_msg(self, consumer, msg): control_messages = [] diff --git a/morpheus/stages/input/file_source_stage.py b/morpheus/stages/input/file_source_stage.py index 14a55df104..d05c8c2190 100644 --- a/morpheus/stages/input/file_source_stage.py +++ b/morpheus/stages/input/file_source_stage.py @@ -27,6 +27,7 @@ from morpheus.messages import MessageMeta from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.single_output_source import SingleOutputSource +from morpheus.pipeline.stage_schema import StageSchema logger = logging.getLogger(__name__) @@ -100,8 +101,8 @@ def supports_cpp_node(self) -> bool: """Indicates whether or not this stage supports a C++ node""" return True - def output_type(self) -> type: - return MessageMeta + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MessageMeta) def _build_source(self, builder: mrc.Builder) -> mrc.SegmentObject: diff --git a/morpheus/stages/input/http_client_source_stage.py b/morpheus/stages/input/http_client_source_stage.py index 637709930b..b6b8b833d3 100644 --- a/morpheus/stages/input/http_client_source_stage.py +++ b/morpheus/stages/input/http_client_source_stage.py @@ -28,6 +28,7 @@ from morpheus.messages import MessageMeta from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.single_output_source import SingleOutputSource +from morpheus.pipeline.stage_schema import StageSchema from morpheus.utils import http_utils logger = logging.getLogger(__name__) @@ -145,8 +146,8 @@ def supports_cpp_node(self) -> bool: """Indicates whether or not this stage supports a C++ implementation""" return False - def output_type(self) -> type: - return MessageMeta + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MessageMeta) def _parse_response(self, response: requests.Response) -> typing.Union[cudf.DataFrame, None]: """ diff --git a/morpheus/stages/input/http_server_source_stage.py b/morpheus/stages/input/http_server_source_stage.py index ee0d25749b..9aa8df0be4 100644 --- a/morpheus/stages/input/http_server_source_stage.py +++ b/morpheus/stages/input/http_server_source_stage.py @@ -29,6 +29,7 @@ from morpheus.messages import MessageMeta from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.single_output_source import SingleOutputSource +from morpheus.pipeline.stage_schema import StageSchema from morpheus.utils.http_utils import HTTPMethod from morpheus.utils.http_utils import HttpParseResponse from morpheus.utils.http_utils import MimeTypes @@ -128,8 +129,8 @@ def supports_cpp_node(self) -> bool: """Indicates whether or not this stage supports C++ nodes.""" return True - def output_type(self) -> type: - return MessageMeta + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MessageMeta) def _parse_payload(self, payload: str) -> HttpParseResponse: try: diff --git a/morpheus/stages/input/in_memory_source_stage.py b/morpheus/stages/input/in_memory_source_stage.py index 540b95ce8d..18da2deb18 100644 --- a/morpheus/stages/input/in_memory_source_stage.py +++ b/morpheus/stages/input/in_memory_source_stage.py @@ -22,6 +22,7 @@ from morpheus.messages import MessageMeta from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.single_output_source import SingleOutputSource +from morpheus.pipeline.stage_schema import StageSchema class InMemorySourceStage(PreallocatorMixin, SingleOutputSource): @@ -51,8 +52,8 @@ def name(self) -> str: def supports_cpp_node(self) -> bool: return False - def output_type(self) -> type: - return MessageMeta + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MessageMeta) def _generate_frames(self) -> typing.Iterator[MessageMeta]: for i in range(self._repeat_count): diff --git a/morpheus/stages/input/kafka_source_stage.py b/morpheus/stages/input/kafka_source_stage.py index ff00097ecd..f77f758909 100644 --- a/morpheus/stages/input/kafka_source_stage.py +++ b/morpheus/stages/input/kafka_source_stage.py @@ -31,6 +31,7 @@ from morpheus.messages import MessageMeta from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.single_output_source import SingleOutputSource +from morpheus.pipeline.stage_schema import StageSchema logger = logging.getLogger(__name__) @@ -136,8 +137,8 @@ def name(self) -> str: def supports_cpp_node(self): return True - def output_type(self) -> type: - return MessageMeta + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MessageMeta) def stop(self): """ diff --git a/morpheus/stages/input/rss_source_stage.py b/morpheus/stages/input/rss_source_stage.py index faf86de603..e02061f778 100644 --- a/morpheus/stages/input/rss_source_stage.py +++ b/morpheus/stages/input/rss_source_stage.py @@ -23,6 +23,7 @@ from morpheus.messages import MessageMeta from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.single_output_source import SingleOutputSource +from morpheus.pipeline.stage_schema import StageSchema logger = logging.getLogger(__name__) @@ -75,8 +76,8 @@ def stop(self): def supports_cpp_node(self): return False - def output_type(self) -> type: - return MessageMeta + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MessageMeta) def _fetch_feeds(self) -> MessageMeta: """ diff --git a/morpheus/stages/postprocess/filter_detections_stage.py b/morpheus/stages/postprocess/filter_detections_stage.py index 2eeeb6b655..261353732e 100644 --- a/morpheus/stages/postprocess/filter_detections_stage.py +++ b/morpheus/stages/postprocess/filter_detections_stage.py @@ -26,6 +26,7 @@ from morpheus.messages import MultiMessage from morpheus.messages import MultiResponseMessage from morpheus.pipeline.single_port_stage import SinglePortStage +from morpheus.pipeline.stage_schema import StageSchema logger = logging.getLogger(__name__) @@ -106,9 +107,9 @@ def accepted_types(self) -> typing.Tuple: return (MultiMessage, ) - def output_type(self, parent_output_type: type) -> type: - self._controller.update_filter_source(message_type=parent_output_type) - return parent_output_type + def compute_schema(self, schema: StageSchema): + self._controller.update_filter_source(message_type=schema.input_type) + schema.output_schema.set_type(schema.input_type) def supports_cpp_node(self): # Enable support by default diff --git a/morpheus/stages/postprocess/serialize_stage.py b/morpheus/stages/postprocess/serialize_stage.py index 7a45c29438..fd61dc101b 100644 --- a/morpheus/stages/postprocess/serialize_stage.py +++ b/morpheus/stages/postprocess/serialize_stage.py @@ -25,6 +25,7 @@ from morpheus.messages import MessageMeta from morpheus.messages import MultiMessage from morpheus.pipeline.single_port_stage import SinglePortStage +from morpheus.pipeline.stage_schema import StageSchema @register_stage("serialize") @@ -78,8 +79,8 @@ def accepted_types(self) -> typing.Tuple: """ return (MultiMessage, ) - def output_type(self, parent_output_type: type) -> type: - return MessageMeta + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MessageMeta) def supports_cpp_node(self): # Enable support by default diff --git a/morpheus/stages/preprocess/preprocess_base_stage.py b/morpheus/stages/preprocess/preprocess_base_stage.py index 5d5713e0b3..eee392d413 100644 --- a/morpheus/stages/preprocess/preprocess_base_stage.py +++ b/morpheus/stages/preprocess/preprocess_base_stage.py @@ -24,6 +24,7 @@ from morpheus.messages import MultiInferenceMessage from morpheus.messages import MultiMessage from morpheus.pipeline.multi_message_stage import MultiMessageStage +from morpheus.pipeline.stage_schema import StageSchema class PreprocessBaseStage(MultiMessageStage): @@ -50,7 +51,7 @@ def accepted_types(self) -> typing.Tuple: """ return (MultiMessage, ) - def output_type(self, parent_output_type: type) -> type: + def compute_schema(self, schema: StageSchema): out_type = MultiInferenceMessage self._preprocess_fn = self._get_preprocess_fn() @@ -61,7 +62,7 @@ def output_type(self, parent_output_type: type) -> type: and typing_utils.issubtype(preproc_sig.return_annotation, MultiInferenceMessage)): out_type = preproc_sig.return_annotation - return out_type + schema.output_schema.set_type(out_type) @abstractmethod def _get_preprocess_fn(self) -> typing.Callable[[MultiMessage], MultiInferenceMessage]: diff --git a/morpheus/stages/preprocess/train_ae_stage.py b/morpheus/stages/preprocess/train_ae_stage.py index 4e1b72e3c2..9f66c16123 100644 --- a/morpheus/stages/preprocess/train_ae_stage.py +++ b/morpheus/stages/preprocess/train_ae_stage.py @@ -30,6 +30,7 @@ from morpheus.messages.multi_ae_message import MultiAEMessage from morpheus.models.dfencoder import AutoEncoder from morpheus.pipeline.multi_message_stage import MultiMessageStage +from morpheus.pipeline.stage_schema import StageSchema from morpheus.utils.seed import manual_seed logger = logging.getLogger(__name__) @@ -208,8 +209,8 @@ def accepted_types(self) -> typing.Tuple: """ return (UserMessageMeta, ) - def output_type(self, parent_output_type: type) -> type: - return MultiAEMessage + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MultiAEMessage) def supports_cpp_node(self): return False diff --git a/tests/_utils/stages/conv_msg.py b/tests/_utils/stages/conv_msg.py index 7175bacc6d..6da9ef1c19 100755 --- a/tests/_utils/stages/conv_msg.py +++ b/tests/_utils/stages/conv_msg.py @@ -28,6 +28,7 @@ from morpheus.messages import MultiResponseMessage from morpheus.messages import ResponseMemory from morpheus.pipeline.single_port_stage import SinglePortStage +from morpheus.pipeline.stage_schema import StageSchema @register_stage("unittest-conv-msg", ignore_args=["expected_data"]) @@ -68,8 +69,8 @@ def name(self) -> str: def accepted_types(self) -> typing.Tuple: return (MultiMessage, ) - def output_type(self, parent_output_type: type) -> type: - return MultiResponseMessage + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MultiResponseMessage) def supports_cpp_node(self) -> bool: return False diff --git a/tests/_utils/stages/in_memory_source_x_stage.py b/tests/_utils/stages/in_memory_source_x_stage.py index 6fde686bc6..f2ab6a6e59 100644 --- a/tests/_utils/stages/in_memory_source_x_stage.py +++ b/tests/_utils/stages/in_memory_source_x_stage.py @@ -19,6 +19,7 @@ from morpheus.config import Config from morpheus.pipeline.single_output_source import SingleOutputSource +from morpheus.pipeline.stage_schema import StageSchema class InMemSourceXStage(SingleOutputSource): @@ -38,8 +39,8 @@ def name(self) -> str: def supports_cpp_node(self) -> bool: return False - def output_type(self) -> type: - return type(self._data[0]) + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(type(self._data[0])) def _emit_data(self) -> typing.Iterator[typing.Any]: for x in self._data: diff --git a/tests/_utils/stages/split_stage.py b/tests/_utils/stages/split_stage.py index 29cc07fc04..143de10872 100644 --- a/tests/_utils/stages/split_stage.py +++ b/tests/_utils/stages/split_stage.py @@ -21,6 +21,7 @@ from morpheus.config import Config from morpheus.messages import MessageMeta from morpheus.pipeline.stage import Stage +from morpheus.pipeline.stage_schema import StageSchema class SplitStage(Stage): @@ -37,7 +38,11 @@ def name(self) -> str: def supports_cpp_node(self): return False - def output_types(self, parent_output_types: list[type]) -> list[type]: + def compute_schema(self, schema: StageSchema): + assert len(schema.output_schemas) == 2, "Expected two output schemas" + for port_schema in schema.output_schemas: + port_schema.set_type(MessageMeta) + return [MessageMeta, MessageMeta] def _build(self, builder: mrc.Builder, input_nodes: list[mrc.SegmentObject]) -> list[mrc.SegmentObject]: diff --git a/tests/benchmarks/static_message_source.py b/tests/benchmarks/static_message_source.py index b54146b2c6..076f865e41 100644 --- a/tests/benchmarks/static_message_source.py +++ b/tests/benchmarks/static_message_source.py @@ -20,6 +20,7 @@ from morpheus.config import Config from morpheus.messages import MessageMeta from morpheus.pipeline import SingleOutputSource +from morpheus.pipeline.stage_schema import StageSchema class StaticMessageSource(SingleOutputSource): @@ -41,8 +42,8 @@ def supports_cpp_node(self): def input_count(self) -> int: return len(self._df) - def output_type(self) -> type: - return MessageMeta + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MessageMeta) def _build_source(self, builder: mrc.Builder) -> mrc.SegmentObject: return builder.make_source(self.unique_name, self._generate_frames()) From e455b961b25a59ec68420ea5f58536fa6b1004af Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 2 Oct 2023 14:01:53 -0700 Subject: [PATCH 14/65] Fixing type-os --- morpheus/pipeline/pass_thru_type_mixin.py | 2 +- morpheus/pipeline/preallocator_mixin.py | 2 +- morpheus/pipeline/stage_schema.py | 6 ++++-- morpheus/pipeline/stream_wrapper.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/morpheus/pipeline/pass_thru_type_mixin.py b/morpheus/pipeline/pass_thru_type_mixin.py index fe47052239..9033ccdb68 100644 --- a/morpheus/pipeline/pass_thru_type_mixin.py +++ b/morpheus/pipeline/pass_thru_type_mixin.py @@ -24,5 +24,5 @@ class PassThruTypeMixin(ABC): """ def compute_schema(self, schema: StageSchema): - for (port_idx, port_schema) in schema.input_schemas: + for (port_idx, port_schema) in enumerate(schema.input_schemas): schema.output_schemas[port_idx].set_type(port_schema.get_type()) diff --git a/morpheus/pipeline/preallocator_mixin.py b/morpheus/pipeline/preallocator_mixin.py index 58d9af4e33..6f75b4d44b 100644 --- a/morpheus/pipeline/preallocator_mixin.py +++ b/morpheus/pipeline/preallocator_mixin.py @@ -86,7 +86,7 @@ def _preallocate_multi(self, msg: MultiMessage) -> MultiMessage: return msg def _post_build_single(self, builder: mrc.Builder, out_node: mrc.SegmentObject) -> mrc.SegmentObject: - out_type = self.output_ports[0].out_type + out_type = self.output_ports[0].output_type pretty_type = pretty_print_type_name(out_type) if len(self._needed_columns) > 0: diff --git a/morpheus/pipeline/stage_schema.py b/morpheus/pipeline/stage_schema.py index f1c261cd81..66233ab234 100644 --- a/morpheus/pipeline/stage_schema.py +++ b/morpheus/pipeline/stage_schema.py @@ -46,8 +46,10 @@ class StageSchema: def __init__(self, stage: "StreamWrapper"): self._input_schemas = [] for port in stage.input_ports: - assert port.input_schema.is_complete(), "Attempted to create StageSchema with incomplete input port schemas" - self._input_schemas.append(port.input_schema) + input_schema = port.get_input_schema() + assert input_schema.is_complete(), \ + f"Attempted to create StageSchema for {stage} with incomplete input port schemas" + self._input_schemas.append(input_schema) self._output_schemas = [PortSchema() for _ in range(len(stage.output_ports))] diff --git a/morpheus/pipeline/stream_wrapper.py b/morpheus/pipeline/stream_wrapper.py index 42dabd51ba..2ba04249ce 100644 --- a/morpheus/pipeline/stream_wrapper.py +++ b/morpheus/pipeline/stream_wrapper.py @@ -481,7 +481,7 @@ def compute_schema(self, schema: _pipeline.StageSchema): all output ports. For example a simple pass-thru stage might perform the following: ``` - >>> for (port_idx, port_schema) in schema.input_schemas: + >>> for (port_idx, port_schema) in enumerate(schema.input_schemas): >>> schema.output_schemas[port_idx].set_type(port_schema.get_type()) ``` From 120636b4a12e401d13400af7d2ff1919a00f5eea Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 2 Oct 2023 14:11:43 -0700 Subject: [PATCH 15/65] Fix type-os for viz --- morpheus/pipeline/pipeline.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/morpheus/pipeline/pipeline.py b/morpheus/pipeline/pipeline.py index 23ed3cc8eb..35d93c9181 100644 --- a/morpheus/pipeline/pipeline.py +++ b/morpheus/pipeline/pipeline.py @@ -537,17 +537,17 @@ def has_ports(node: StreamWrapper, is_input): if (len(in_port._input_senders) == 1 and len(out_port._output_receivers) == 1 and (in_port.input_schema == out_port.output_schema)): - edge_attrs["label"] = pretty_print_type_name(in_port.input_schema) + edge_attrs["label"] = pretty_print_type_name(in_port.input_type) else: rec_idx = out_port._output_receivers.index(in_port) sen_idx = in_port._input_senders.index(out_port) # Add type labels if available if (rec_idx == 0 and out_port.output_schema is not None): - edge_attrs["taillabel"] = pretty_print_type_name(out_port.output_schema) + edge_attrs["taillabel"] = pretty_print_type_name(out_port.output_type) if (sen_idx == 0 and in_port.input_schema is not None): - edge_attrs["headlabel"] = pretty_print_type_name(in_port.input_schema) + edge_attrs["headlabel"] = pretty_print_type_name(in_port.input_type) gv_subgraph.edge(start_name, end_name, **edge_attrs) From 3ff29d37e6f6f3cef333fd14b4fe7e7875f8220a Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 2 Oct 2023 14:12:02 -0700 Subject: [PATCH 16/65] Temporarily skip failing test DO NOT MERGE --- tests/test_nonlinear_pipeline.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_nonlinear_pipeline.py b/tests/test_nonlinear_pipeline.py index 96665957ac..30812ce2e9 100755 --- a/tests/test_nonlinear_pipeline.py +++ b/tests/test_nonlinear_pipeline.py @@ -53,6 +53,7 @@ def test_forking_pipeline(config: Config, dataset_cudf: DatasetManager): assert_results(comp_lower.get_results()) +@pytest.mark.skip(reason="TODO: Fix this test, do not merge") @pytest.mark.parametrize("source_count, expected_count", [(1, 1), (2, 2), (3, 3)]) def test_port_multi_sender(config: Config, dataset_cudf: DatasetManager, source_count: int, expected_count: int): From 8ae4167c59dd64d590e5006fa16fefcb8002f413 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 2 Oct 2023 14:16:49 -0700 Subject: [PATCH 17/65] Revert "Temporarily skip failing test DO NOT MERGE" This reverts commit 3ff29d37e6f6f3cef333fd14b4fe7e7875f8220a. --- tests/test_nonlinear_pipeline.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_nonlinear_pipeline.py b/tests/test_nonlinear_pipeline.py index 30812ce2e9..96665957ac 100755 --- a/tests/test_nonlinear_pipeline.py +++ b/tests/test_nonlinear_pipeline.py @@ -53,7 +53,6 @@ def test_forking_pipeline(config: Config, dataset_cudf: DatasetManager): assert_results(comp_lower.get_results()) -@pytest.mark.skip(reason="TODO: Fix this test, do not merge") @pytest.mark.parametrize("source_count, expected_count", [(1, 1), (2, 2), (3, 3)]) def test_port_multi_sender(config: Config, dataset_cudf: DatasetManager, source_count: int, expected_count: int): From 65466443a4fd44dca3cbea9ec45d9464b90e82b4 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 2 Oct 2023 14:37:38 -0700 Subject: [PATCH 18/65] Complete port schemas produced on the fly by the receiver --- morpheus/pipeline/receiver.py | 1 + 1 file changed, 1 insertion(+) diff --git a/morpheus/pipeline/receiver.py b/morpheus/pipeline/receiver.py index 8e18574933..9d7fe7b8a6 100644 --- a/morpheus/pipeline/receiver.py +++ b/morpheus/pipeline/receiver.py @@ -112,6 +112,7 @@ def _compute_input_schema(self): "Use a merge stage to handle different types of inputs.")) self._input_schema = _pipeline.PortSchema(port_type=great_ancestor) + self._input_schema.complete() self._is_schema_linked = True def get_input_schema(self) -> _pipeline.PortSchema: From ff48df32b0b5f3133c523cf3631d5072062464fe Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 2 Oct 2023 15:15:19 -0700 Subject: [PATCH 19/65] Lint fixes [no ci] --- morpheus/pipeline/receiver.py | 1 - morpheus/pipeline/single_output_source.py | 2 +- morpheus/pipeline/stage.py | 1 - morpheus/pipeline/stage_schema.py | 1 - 4 files changed, 1 insertion(+), 4 deletions(-) diff --git a/morpheus/pipeline/receiver.py b/morpheus/pipeline/receiver.py index 9d7fe7b8a6..1000b459f5 100644 --- a/morpheus/pipeline/receiver.py +++ b/morpheus/pipeline/receiver.py @@ -16,7 +16,6 @@ import typing import mrc -import typing_utils import morpheus.pipeline as _pipeline from morpheus.utils.type_utils import greatest_ancestor diff --git a/morpheus/pipeline/single_output_source.py b/morpheus/pipeline/single_output_source.py index 25a445f201..45784f6bf8 100644 --- a/morpheus/pipeline/single_output_source.py +++ b/morpheus/pipeline/single_output_source.py @@ -64,7 +64,7 @@ def _build_source(self, builder: mrc.Builder) -> mrc.SegmentObject: def _build_sources(self, builder: mrc.Builder) -> list[mrc.SegmentObject]: assert len(self.output_ports) == 1, \ f"SingleOutputSource should have only one output port, {self} has {len(self.output_ports)}" - + return [self._build_source(builder)] @typing.final diff --git a/morpheus/pipeline/stage.py b/morpheus/pipeline/stage.py index b84b55ccbb..8afeac3a37 100644 --- a/morpheus/pipeline/stage.py +++ b/morpheus/pipeline/stage.py @@ -14,7 +14,6 @@ import logging import warnings -from abc import abstractmethod import mrc diff --git a/morpheus/pipeline/stage_schema.py b/morpheus/pipeline/stage_schema.py index 66233ab234..bf2ceb277b 100644 --- a/morpheus/pipeline/stage_schema.py +++ b/morpheus/pipeline/stage_schema.py @@ -100,7 +100,6 @@ def output_schema(self) -> PortSchema: assert len(self._output_schemas) == 1, \ "Attempted to access output_schema property on StageSchema with multiple outputs" return self._output_schemas[0] - def complete(self): """ From 61b3cb12224111538d914cbe2175c24c9d01148f Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Wed, 4 Oct 2023 09:53:55 -0600 Subject: [PATCH 20/65] Add pinned libwebp to resolve CVE (#1236) Authors: - Devin Robison (https://github.com/drobison00) - David Gardner (https://github.com/dagardner-nv) Approvers: - David Gardner (https://github.com/dagardner-nv) - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1236 --- ci/scripts/github/common.sh | 1 + docker/conda/environments/cuda11.8_dev.yml | 3 ++- docker/conda/environments/cuda11.8_examples.yml | 3 ++- models/mlflow/docker/conda/mlflow-env.yml | 1 + 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/ci/scripts/github/common.sh b/ci/scripts/github/common.sh index c712ae9e75..68a99e90a1 100644 --- a/ci/scripts/github/common.sh +++ b/ci/scripts/github/common.sh @@ -98,6 +98,7 @@ function update_conda_env() { rapids-logger "Checking for updates to conda env" # Update the packages + rm -rf /opt/conda/.condarc /opt/conda/envs/morpheus/lib/python3.10/site-packages/fastjsonschema-2.18.0.dist-info rapids-mamba-retry env update -n morpheus --prune -q --file ${ENV_YAML} # Finally, reactivate diff --git a/docker/conda/environments/cuda11.8_dev.yml b/docker/conda/environments/cuda11.8_dev.yml index d1e7ad8c6c..5ab3c774d8 100644 --- a/docker/conda/environments/cuda11.8_dev.yml +++ b/docker/conda/environments/cuda11.8_dev.yml @@ -59,7 +59,8 @@ dependencies: - isort - libgrpc>=1.49 - librdkafka=1.9.2 - - mlflow>=2.2.1,<3 + - libwebp>=1.3.2 # Required for CVE mitigation: https://nvd.nist.gov/vuln/detail/CVE-2023-4863 + - mlflow>=2.2.1,<2.7 - mrc=23.07 - networkx=3.1 - ninja=1.10 diff --git a/docker/conda/environments/cuda11.8_examples.yml b/docker/conda/environments/cuda11.8_examples.yml index a878484df7..eef25e5d2e 100644 --- a/docker/conda/environments/cuda11.8_examples.yml +++ b/docker/conda/environments/cuda11.8_examples.yml @@ -30,7 +30,8 @@ dependencies: - dask>=2023.1.1 - dill=0.3.6 - distributed>=2023.1.1 - - mlflow>=2.2.1,<3 + - libwebp>=1.3.2 # Required for CVE mitigation: https://nvd.nist.gov/vuln/detail/CVE-2023-4863 + - mlflow>=2.2.1,<2.7 - papermill=2.3.4 - s3fs>=2023.6 - pip diff --git a/models/mlflow/docker/conda/mlflow-env.yml b/models/mlflow/docker/conda/mlflow-env.yml index f0022cba8a..9c2acc0b38 100644 --- a/models/mlflow/docker/conda/mlflow-env.yml +++ b/models/mlflow/docker/conda/mlflow-env.yml @@ -20,6 +20,7 @@ channels: dependencies: - boto3 - onnx + - libwebp>=1.3.2 # Required for CVE mitigation: https://nvd.nist.gov/vuln/detail/CVE-2023-4863 - psycopg2<3 - pymysql - python=3.11 From 7aaec71be795be5f474ea3f0666455eca309cdea Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Wed, 4 Oct 2023 18:42:45 -0600 Subject: [PATCH 21/65] Add libwebp to meta.yaml for CVE 2307 (#1242) Update meta.yaml for CVE 2307 mitigation Authors: - Devin Robison (https://github.com/drobison00) - David Gardner (https://github.com/dagardner-nv) Approvers: - David Gardner (https://github.com/dagardner-nv) - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1242 --- ci/conda/recipes/morpheus/meta.yaml | 3 +- tests/test_kafka_source_stage_pipe.py | 110 +++++++++++--------------- 2 files changed, 47 insertions(+), 66 deletions(-) diff --git a/ci/conda/recipes/morpheus/meta.yaml b/ci/conda/recipes/morpheus/meta.yaml index 9f117ed5ad..04d30a6105 100644 --- a/ci/conda/recipes/morpheus/meta.yaml +++ b/ci/conda/recipes/morpheus/meta.yaml @@ -79,7 +79,8 @@ outputs: - docker-py 5.0.* - grpcio # Version determined from cudf - libmrc - - mlflow >=2.2.1,<3 + - libwebp>=1.3.2 # Required for CVE mitigation: https://nvd.nist.gov/vuln/detail/CVE-2023-4863 + - mlflow>=2.2.1,<2.7 - mrc - networkx 3.1.* - numpydoc 1.4.* diff --git a/tests/test_kafka_source_stage_pipe.py b/tests/test_kafka_source_stage_pipe.py index 27b40440e5..9b423c30a1 100644 --- a/tests/test_kafka_source_stage_pipe.py +++ b/tests/test_kafka_source_stage_pipe.py @@ -15,16 +15,14 @@ # limitations under the License. import os +import time import typing -import mrc import pandas as pd import pytest -from mrc.core import operators as ops from morpheus.config import Config from morpheus.pipeline.linear_pipeline import LinearPipeline -from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.stages.general.trigger_stage import TriggerStage from morpheus.stages.input.kafka_source_stage import KafkaSourceStage from morpheus.stages.output.compare_dataframe_stage import CompareDataFrameStage @@ -36,6 +34,9 @@ from utils import write_file_to_kafka from utils.stages.dfp_length_checker import DFPLengthChecker +if (typing.TYPE_CHECKING): + from kafka import KafkaConsumer + @pytest.mark.kafka def test_kafka_source_stage_pipe(config, kafka_bootstrap_servers: str, kafka_topics: typing.Tuple[str, str]) -> None: @@ -93,77 +94,47 @@ def test_multi_topic_kafka_source_stage_pipe(config, kafka_bootstrap_servers: st assert_results(comp_stage.get_results()) -class OffsetChecker(SinglePortStage): +def seek_to_beginning(kafka_consumer: "KafkaConsumer", timeout: int = 15): """ - Verifies that the kafka offsets are being updated as a way of verifying that the - consumer is performing a commit. + Seeks to the beginning of the Kafka topic """ + start = time.time() + end = start + timeout + partitions_assigned = False + while not partitions_assigned and time.time() <= end: + kafka_consumer.poll(timeout_ms=20) + partitions_assigned = len(kafka_consumer.assignment()) > 0 + if not partitions_assigned: + time.sleep(0.1) - def __init__(self, c: Config, bootstrap_servers: str, group_id: str): - super().__init__(c) - - # Importing here so that running without the --run_kafka flag won't fail due - # to not having the kafka libs installed - from kafka import KafkaAdminClient - - self._client = KafkaAdminClient(bootstrap_servers=bootstrap_servers) - self._group_id = group_id - self._offsets = None - - @property - def name(self) -> str: - return "morpheus_offset_checker" - - def accepted_types(self) -> typing.Tuple: - """ - Accepted input types for this stage are returned. - - Returns - ------- - typing.Tuple - Accepted input types. - - """ - return (typing.Any, ) - - def supports_cpp_node(self): - return False - - def _offset_checker(self, x): - at_least_one_gt = False - new_offsets = self._client.list_consumer_group_offsets(self._group_id) - - if self._offsets is not None: - for (tp, prev_offset) in self._offsets.items(): - new_offset = new_offsets[tp] - - assert new_offset.offset >= prev_offset.offset + assert partitions_assigned - if new_offset.offset > prev_offset.offset: - at_least_one_gt = True - - assert at_least_one_gt - - self._offsets = new_offsets - - return x - - def _build_single(self, builder: mrc.Builder, input_stream): - node = builder.make_node(self.unique_name, ops.map(self._offset_checker)) - builder.make_edge(input_stream[0], node) - - return node, input_stream[1] + kafka_consumer.seek_to_beginning() @pytest.mark.kafka +@pytest.mark.parametrize('async_commits', [True, False]) @pytest.mark.parametrize('num_records', [10, 100, 1000]) -def test_kafka_source_commit(num_records, config, kafka_bootstrap_servers: str, - kafka_topics: typing.Tuple[str, str]) -> None: +def test_kafka_source_commit(num_records: int, + async_commits: bool, + config: Config, + kafka_bootstrap_servers: str, + kafka_topics: typing.Tuple[str, str], + kafka_consumer: "KafkaConsumer") -> None: + group_id = 'morpheus' data = [{'v': i} for i in range(num_records)] num_written = write_data_to_kafka(kafka_bootstrap_servers, kafka_topics.input_topic, data) assert num_written == num_records + kafka_consumer.subscribe([kafka_topics.input_topic]) + seek_to_beginning(kafka_consumer) + partitions = kafka_consumer.assignment() + + # This method does not advance the consumer, and even if it did, this consumer has a different group_id than the + # source stage + expected_offsets = kafka_consumer.end_offsets(partitions) + pipe = LinearPipeline(config) pipe.set_source( KafkaSourceStage(config, @@ -171,12 +142,10 @@ def test_kafka_source_commit(num_records, config, kafka_bootstrap_servers: str, input_topic=kafka_topics.input_topic, auto_offset_reset="earliest", poll_interval="1seconds", - group_id='morpheus', + group_id=group_id, client_id='morpheus_kafka_source_commit', stop_after=num_records, - async_commits=False)) - - pipe.add_stage(OffsetChecker(config, bootstrap_servers=kafka_bootstrap_servers, group_id='morpheus')) + async_commits=async_commits)) pipe.add_stage(TriggerStage(config)) pipe.add_stage(DeserializeStage(config)) @@ -187,6 +156,17 @@ def test_kafka_source_commit(num_records, config, kafka_bootstrap_servers: str, assert_results(comp_stage.get_results()) + from kafka import KafkaAdminClient + admin_client = KafkaAdminClient(bootstrap_servers=kafka_bootstrap_servers, client_id='offset_checker') + offsets = admin_client.list_consumer_group_offsets(group_id) + + # The broker may have created additional partitions, offsets should be a superset of expected_offsets + for (topic_partition, expected_offset) in expected_offsets.items(): + # The value of the offsets dict being returned is a tuple of (offset, metadata), while the value of the + # expected_offsets is just the offset. + actual_offset = offsets[topic_partition][0] + assert actual_offset == expected_offset + @pytest.mark.kafka @pytest.mark.parametrize('num_records', [1000]) From 36ccb8ef00c56aca63aa69080194bb4133a31916 Mon Sep 17 00:00:00 2001 From: pthalasta <68306050+pthalasta@users.noreply.github.com> Date: Wed, 4 Oct 2023 17:44:33 -0700 Subject: [PATCH 22/65] Adds support to read and write to Databricks delta tables (#630) Adds support for querying data from Databricks delta tables and write the results to delta tables. Resolves #611 Authors: - https://github.com/pthalasta Approvers: - Christopher Harris (https://github.com/cwharris) - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/630 --- docker/Dockerfile | 1 + docker/conda/environments/cuda11.8_dev.yml | 1 + docs/source/conf.py | 1 + .../databricks_deltalake_source_stage.py | 102 +++++++++++++ .../write_to_databricks_deltalake_stage.py | 143 ++++++++++++++++++ .../test_databricks_deltalake_source_stage.py | 53 +++++++ ...est_write_to_databricks_deltalake_stage.py | 56 +++++++ 7 files changed, 357 insertions(+) create mode 100644 morpheus/stages/input/databricks_deltalake_source_stage.py create mode 100644 morpheus/stages/output/write_to_databricks_deltalake_stage.py create mode 100644 tests/test_databricks_deltalake_source_stage.py create mode 100644 tests/test_write_to_databricks_deltalake_stage.py diff --git a/docker/Dockerfile b/docker/Dockerfile index 3f82e5b17b..4c89accc20 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -85,6 +85,7 @@ RUN --mount=type=cache,id=apt,target=/var/cache/apt \ libcurand-${CUDA_MAJOR_VER}-${CUDA_MINOR_VER} \ libcusolver-${CUDA_MAJOR_VER}-${CUDA_MINOR_VER} \ libnuma1 \ + openjdk-11-jre-headless \ openssh-client \ pkg-config \ tar \ diff --git a/docker/conda/environments/cuda11.8_dev.yml b/docker/conda/environments/cuda11.8_dev.yml index 832038fc8f..f480f6c205 100644 --- a/docker/conda/environments/cuda11.8_dev.yml +++ b/docker/conda/environments/cuda11.8_dev.yml @@ -107,4 +107,5 @@ dependencies: ####### Morpheus Pip Dependencies (keep sorted!) ####### - pip: # Add additional dev dependencies here + - databricks-connect - pytest-kafka==0.6.0 diff --git a/docs/source/conf.py b/docs/source/conf.py index 940f9c44b0..adb924692f 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -164,6 +164,7 @@ autodoc_mock_imports = [ "cudf", # Avoid loading GPU libraries during the documentation build "cupy", # Avoid loading GPU libraries during the documentation build + "databricks.connect", "merlin", "morpheus.cli.commands", # Dont document the CLI in Sphinx "nvtabular", diff --git a/morpheus/stages/input/databricks_deltalake_source_stage.py b/morpheus/stages/input/databricks_deltalake_source_stage.py new file mode 100644 index 0000000000..2fb87da5d6 --- /dev/null +++ b/morpheus/stages/input/databricks_deltalake_source_stage.py @@ -0,0 +1,102 @@ +# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging + +import mrc +from databricks.connect import DatabricksSession +from pyspark.sql import functions as sf +from pyspark.sql.window import Window + +import cudf + +from morpheus.cli.register_stage import register_stage +from morpheus.config import Config +from morpheus.messages.message_meta import MessageMeta +from morpheus.pipeline.preallocator_mixin import PreallocatorMixin +from morpheus.pipeline.single_output_source import SingleOutputSource +from morpheus.pipeline.stream_pair import StreamPair + +logger = logging.getLogger(__name__) + + +@register_stage("from-databricks-deltalake") +class DataBricksDeltaLakeSourceStage(PreallocatorMixin, SingleOutputSource): + """ + Source stage used to load messages from a DeltaLake table. + + Parameters + ---------- + config : morpheus.config.Config + Pipeline configuration instance. + spark_query : str, default None + SQL Query that need to be executed to fetch the results from deltalake table. + items_per_page: int, default 1000 + Number of rows per iteration/page to be fetched from remote spark cluster. + databricks_host : str, default None + URL of Databricks host to connect to. + databricks_token : str, default None + Access token for Databricks cluster. + databricks_cluster_id : str, default None + Databricks cluster to be used to query the data as per SQL provided. + """ + + def __init__(self, + config: Config, + spark_query: str = None, + items_per_page: int = 1000, + databricks_host: str = None, + databricks_token: str = None, + databricks_cluster_id: str = None): + + super().__init__(config) + self.spark_query = spark_query + self.spark = DatabricksSession.builder.remote(host=databricks_host, + token=databricks_token, + cluster_id=databricks_cluster_id).getOrCreate() + self.items_per_page = items_per_page + self.offset = 0 + + @property + def name(self) -> str: + return "from-databricks-deltalake" + + def supports_cpp_node(self) -> bool: + return False + + def _build_source(self, builder: mrc.Builder) -> StreamPair: + node = builder.make_source(self.unique_name, self.source_generator) + return node, MessageMeta + + def source_generator(self): + try: + spark_df = self.spark.sql(self.spark_query) + spark_df = spark_df.withColumn('_id', sf.monotonically_increasing_id()) + window = Window.partitionBy(sf.lit(1)).orderBy("_id") + spark_df = spark_df.select("*").withColumn("_id", sf.row_number().over(window)) + count = spark_df.count() + while self.offset <= count: + df = spark_df.where(sf.col('_id').between(self.offset, self.offset + self.items_per_page)) + logger.debug("Reading next iteration data between index: \ + %s and %s", + str(self.offset), + str(self.offset + self.items_per_page + 1)) + self.offset += self.items_per_page + 1 + yield MessageMeta(df=cudf.from_pandas(df.toPandas().drop(["_id"], axis=1))) + except Exception as e: + logger.error( + "Error occurred while reading data from \ + DeltaLake and converting to Dataframe: %s", + e) + raise diff --git a/morpheus/stages/output/write_to_databricks_deltalake_stage.py b/morpheus/stages/output/write_to_databricks_deltalake_stage.py new file mode 100644 index 0000000000..fc9d7228b8 --- /dev/null +++ b/morpheus/stages/output/write_to_databricks_deltalake_stage.py @@ -0,0 +1,143 @@ +# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import typing + +import mrc +import pandas as pd +from databricks.connect import DatabricksSession +from mrc.core import operators as ops +from pyspark.sql.types import BooleanType +from pyspark.sql.types import DoubleType +from pyspark.sql.types import FloatType +from pyspark.sql.types import IntegerType +from pyspark.sql.types import LongType +from pyspark.sql.types import StringType +from pyspark.sql.types import StructField +from pyspark.sql.types import StructType +from pyspark.sql.types import TimestampType + +import cudf + +from morpheus.cli.register_stage import register_stage +from morpheus.config import Config +from morpheus.messages import MessageMeta +from morpheus.pipeline.single_port_stage import SinglePortStage +from morpheus.pipeline.stream_pair import StreamPair + +logger = logging.getLogger(__name__) + + +@register_stage("to-databricks-deltalake") +class DataBricksDeltaLakeSinkStage(SinglePortStage): + """ + Sink stage used to write messages to a DeltaLake table. + + Parameters + ---------- + config : morpheus.config.Config + Pipeline configuration instance. + delta_path : str, default None + Path of the delta table where the data need to be written or updated. + databricks_host : str, default None + URL of Databricks host to connect to. + databricks_token : str, default None + Access token for Databricks cluster. + databricks_cluster_id : str, default None + Databricks cluster to be used to query the data as per SQL provided. + delta_table_write_mode: str, default "append" + Delta table write mode for storing data. + """ + + def __init__(self, + config: Config, + delta_path: str = None, + databricks_host: str = None, + databricks_token: str = None, + databricks_cluster_id: str = None, + delta_table_write_mode: str = "append"): + + super().__init__(config) + self.delta_path = delta_path + self.delta_table_write_mode = delta_table_write_mode + self.spark = DatabricksSession.builder.remote(host=databricks_host, + token=databricks_token, + cluster_id=databricks_cluster_id).getOrCreate() + + @property + def name(self) -> str: + return "to-databricks-deltalake" + + def accepted_types(self) -> typing.Tuple: + """ + Returns accepted input types for this stage. + """ + return (MessageMeta, ) + + def supports_cpp_node(self) -> bool: + return False + + def _build_single(self, builder: mrc.Builder, input_stream: StreamPair) -> StreamPair: + stream = input_stream[0] + + def write_to_deltalake(meta: MessageMeta): + """ + convert cudf to spark dataframe + """ + df = meta.copy_dataframe() + if isinstance(df, cudf.DataFrame): + df = df.to_pandas() + schema = self._extract_schema_from_pandas_dataframe(df) + spark_df = self.spark.createDataFrame(df, schema=schema) + spark_df.write \ + .format('delta') \ + .option("mergeSchema", "true") \ + .mode(self.delta_table_write_mode) \ + .save(self.delta_path) + return meta + + node = builder.make_node(self.unique_name, ops.map(write_to_deltalake)) + builder.make_edge(stream, node) + + # Return input unchanged to allow passthrough + return node, input_stream[1] + + @staticmethod + def _extract_schema_from_pandas_dataframe(df: pd.DataFrame) -> StructType: + """ + Extract approximate schemas from pandas dataframe + """ + spark_schema = [] + for col, dtype in df.dtypes.items(): + try: + if dtype == "bool": + spark_dtype = StructField(col, BooleanType()) + elif dtype == "int64": + spark_dtype = StructField(col, LongType()) + elif dtype == "int32": + spark_dtype = StructField(col, IntegerType()) + elif dtype == "float64": + spark_dtype = StructField(col, DoubleType()) + elif dtype == "float32": + spark_dtype = StructField(col, FloatType()) + elif dtype == "datetime64[ns]": + spark_dtype = StructField(col, TimestampType()) + else: + spark_dtype = StructField(col, StringType()) + except Exception as e: + logger.error("Encountered error %s while converting columns %s with data type %s", e, col, dtype) + spark_dtype = StructField(col, StringType()) + spark_schema.append(spark_dtype) + return StructType(spark_schema) diff --git a/tests/test_databricks_deltalake_source_stage.py b/tests/test_databricks_deltalake_source_stage.py new file mode 100644 index 0000000000..6309e3c27d --- /dev/null +++ b/tests/test_databricks_deltalake_source_stage.py @@ -0,0 +1,53 @@ +# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest.mock import patch + +import pytest + +import cudf + +from _utils import assert_results +from _utils.dataset_manager import DatasetManager +from morpheus.config import Config +from morpheus.pipeline import LinearPipeline +from morpheus.stages.input.databricks_deltalake_source_stage import DataBricksDeltaLakeSourceStage +from morpheus.stages.output.compare_dataframe_stage import CompareDataFrameStage + + +@pytest.mark.use_pandas +def test_databricks_deltalake_source_stage_pipe(config: Config, dataset: DatasetManager): + """ + Test the DataBricksDeltaLakeSourceStage against a mock spark session which + will return spark_df converted into a DataFrame with specific rows per page. + """ + + expected_df = dataset['filter_probs.csv'] + # pylint: disable=unused-variable + with patch('morpheus.stages.input.databricks_deltalake_source_stage.DatabricksSession') as mock_db_session: # NOQA + databricks_deltalake_source_stage = DataBricksDeltaLakeSourceStage(config, + spark_query="", + items_per_page=10000, + databricks_host="", + databricks_token="", + databricks_cluster_id="") + databricks_deltalake_source_stage.spark.sql.return_value.withColumn.return_value.select.return_value.\ + withColumn.return_value.where.return_value.toPandas.return_value.drop.return_value = expected_df + databricks_deltalake_source_stage.spark.sql.return_value.withColumn.return_value.select.return_value. \ + withColumn.return_value.count.return_value = expected_df.shape[0] + pipe = LinearPipeline(config) + pipe.set_source(databricks_deltalake_source_stage) + comp_stage = pipe.add_stage(CompareDataFrameStage(config, cudf.from_pandas(expected_df))) + pipe.run() + assert_results(comp_stage.get_results()) diff --git a/tests/test_write_to_databricks_deltalake_stage.py b/tests/test_write_to_databricks_deltalake_stage.py new file mode 100644 index 0000000000..bb1daedc7e --- /dev/null +++ b/tests/test_write_to_databricks_deltalake_stage.py @@ -0,0 +1,56 @@ +# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from unittest import mock +from unittest.mock import patch + +import pytest + +from _utils.dataset_manager import DatasetManager +from morpheus.config import Config +from morpheus.pipeline.linear_pipeline import LinearPipeline +from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage +from morpheus.stages.output.write_to_databricks_deltalake_stage import DataBricksDeltaLakeSinkStage +from morpheus.stages.postprocess.serialize_stage import SerializeStage +from morpheus.stages.preprocess.deserialize_stage import DeserializeStage + + +@pytest.mark.use_cudf +def test_databricks_deltalake_sink_stage_pipe(config: Config, dataset: DatasetManager): + """ + Test the DataBricksDeltaLakeSinkStage against a mock spark session which + will create spark dataframe that will be written to remote + location from databricks cluster. + """ + + df_input_a = dataset['filter_probs.csv'] + # pylint: disable=unused-variable + with patch('morpheus.stages.output.write_to_databricks_deltalake_stage.DatabricksSession') \ + as mock_db_session: # NOQA + databricks_deltalake_sink_stage = DataBricksDeltaLakeSinkStage(config, + delta_path="", + delta_table_write_mode="append", + databricks_host="", + databricks_token="", + databricks_cluster_id="") + mock_spark_df = mock.Mock() + databricks_deltalake_sink_stage.spark.createDataFrame.return_value = mock_spark_df + pipeline = LinearPipeline(config) + pipeline.set_source(InMemorySourceStage(config, [df_input_a])) + pipeline.add_stage(DeserializeStage(config)) + pipeline.add_stage(SerializeStage(config)) + pipeline.add_stage(databricks_deltalake_sink_stage) + pipeline.run() + databricks_deltalake_sink_stage.spark.createDataFrame.assert_called_once() + mock_spark_df.write.format.assert_called_once() From 1b6e9f2b30bf2578a6ca977d2e95305cdb3e38e7 Mon Sep 17 00:00:00 2001 From: Bhargav Suryadevara Date: Wed, 4 Oct 2023 22:28:23 -0500 Subject: [PATCH 23/65] [BUG] Fix Control Message Utils & SQL Max Connections Exhaust (#1243) - Updated SQL loader to utilize connections from the pool. - Fixed control message utility variable referenced before assignment error. closes #1237 #1235 Authors: - Bhargav Suryadevara (https://github.com/bsuryadevara) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1243 --- morpheus/loaders/sql_loader.py | 46 ++++++++++++++----------- morpheus/utils/control_message_utils.py | 7 ++-- 2 files changed, 29 insertions(+), 24 deletions(-) diff --git a/morpheus/loaders/sql_loader.py b/morpheus/loaders/sql_loader.py index e3619f78c3..098d0df53c 100644 --- a/morpheus/loaders/sql_loader.py +++ b/morpheus/loaders/sql_loader.py @@ -18,6 +18,7 @@ import pandas as pd from sqlalchemy import create_engine +from sqlalchemy import engine import cudf @@ -40,7 +41,7 @@ def _parse_query_data( Parameters ---------- query_data : Dict[str, Union[str, Optional[Dict[str, Any]]]] - The dictionary containing the connection string, query, and params (optional). + The dictionary containing the query, and params (optional). Returns ------- @@ -48,22 +49,19 @@ def _parse_query_data( A dictionary containing parsed connection string, query, and params (if present). """ - return { - "connection_string": query_data["connection_string"], - "query": query_data["query"], - "params": query_data.get("params", None) - } + return {"query": query_data["query"], "params": query_data.get("params", None)} -def _read_sql(connection_string: str, query: str, params: typing.Optional[typing.Dict[str, typing.Any]] = None) -> \ - typing.Dict[str, pd.DataFrame]: +def _read_sql(engine_obj: engine.Engine, + query: str, + params: typing.Optional[typing.Dict[str, typing.Any]] = None) -> typing.Dict[str, pd.DataFrame]: """ Creates a DataFrame from a SQL query. Parameters ---------- - connection_string : str - Connection string to the database. + engine_obj : engine.Engine + SQL engine instance. query : str SQL query. params : Optional[Dict[str, Any]], default=None @@ -75,14 +73,10 @@ def _read_sql(connection_string: str, query: str, params: typing.Optional[typing A dictionary containing a DataFrame of the SQL query result. """ - # TODO(Devin): PERFORMANCE OPTIMIZATION - # TODO(Devin): Add connection pooling -- Probably needs to go on the actual loader - engine = create_engine(connection_string) - if (params is None): - df = pd.read_sql(query, engine) + df = pd.read_sql(query, engine_obj) else: - df = pd.read_sql(query, engine, params=params) + df = pd.read_sql(query, engine_obj, params=params) return {"df": df} @@ -132,14 +126,24 @@ def sql_loader(control_message: ControlMessage, task: typing.Dict[str, typing.An with CMDefaultFailureContextManager(control_message): final_df = None + engine_registry = {} sql_config = task["sql_config"] queries = sql_config["queries"] - - for query_data in queries: - aggregate_df = functools.partial(_aggregate_df, df_aggregate=final_df) - execution_chain = ExecutionChain(function_chain=[_parse_query_data, _read_sql, aggregate_df]) - final_df = execution_chain(query_data=query_data) + try: + for query_data in queries: + conn_str = query_data.pop("connection_string") + if conn_str not in engine_registry: + engine_registry[conn_str] = create_engine(conn_str) + + aggregate_df = functools.partial(_aggregate_df, df_aggregate=final_df) + read_sql = functools.partial(_read_sql, engine_obj=engine_registry[conn_str]) + execution_chain = ExecutionChain(function_chain=[_parse_query_data, read_sql, aggregate_df]) + final_df = execution_chain(query_data=query_data) + finally: + # Dispose all open connections. + for engine_obj in engine_registry.values(): + engine_obj.dispose() control_message.payload(MessageMeta(final_df)) diff --git a/morpheus/utils/control_message_utils.py b/morpheus/utils/control_message_utils.py index b1bd40fb36..7d6d7a9254 100644 --- a/morpheus/utils/control_message_utils.py +++ b/morpheus/utils/control_message_utils.py @@ -83,10 +83,11 @@ def cm_default_failure_context_manager(raise_on_failure: bool = False) -> typing def decorator(func): @wraps(func) - def wrapper(control_messsage: ControlMessage, *args, **kwargs): - with CMDefaultFailureContextManager(control_message=control_messsage, + def wrapper(control_message: ControlMessage, *args, **kwargs): + ret_cm = control_message + with CMDefaultFailureContextManager(control_message=control_message, raise_on_failure=raise_on_failure) as ctx_mgr: - cm_ensure_payload_not_null(control_message=control_messsage) + cm_ensure_payload_not_null(control_message=control_message) ret_cm = func(ctx_mgr.control_message, *args, **kwargs) return ret_cm From 865b0e369cd7060e3fa1a9e99315a754a813f86c Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 5 Oct 2023 06:51:51 -0700 Subject: [PATCH 24/65] Remove conflicting condarc file from base image --- ci/scripts/github/common.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ci/scripts/github/common.sh b/ci/scripts/github/common.sh index c60f091336..dd73852e4d 100644 --- a/ci/scripts/github/common.sh +++ b/ci/scripts/github/common.sh @@ -93,6 +93,10 @@ function update_conda_env() { if [[ "${SKIP_CONDA_ENV_UPDATE}" == "" ]]; then rapids-logger "Checking for updates to conda env" + + # Remove default/conflicting channels from base image + rm /opt/conda/.condarc + # Update the packages rapids-mamba-retry env update -n morpheus --prune -q --file ${ENV_YAML} fi From 7682ae4430ec1adad89772e3df692ca8856bb649 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 5 Oct 2023 07:10:14 -0700 Subject: [PATCH 25/65] Fix linting errors --- tests/test_kafka_source_stage_pipe.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_kafka_source_stage_pipe.py b/tests/test_kafka_source_stage_pipe.py index eb27867e28..ea45cca35c 100644 --- a/tests/test_kafka_source_stage_pipe.py +++ b/tests/test_kafka_source_stage_pipe.py @@ -15,7 +15,6 @@ # limitations under the License. import os -import time import typing import pandas as pd @@ -38,9 +37,6 @@ if (typing.TYPE_CHECKING): from kafka import KafkaConsumer -if (typing.TYPE_CHECKING): - from kafka import KafkaConsumer - @pytest.mark.kafka def test_kafka_source_stage_pipe(config, kafka_bootstrap_servers: str, kafka_topics: typing.Tuple[str, str]) -> None: From 6fb88d1e612c713c192d273ccdba3fecd993d903 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 5 Oct 2023 09:31:45 -0700 Subject: [PATCH 26/65] Fix mlflow version constraint to match that of 23.11 --- ci/conda/recipes/morpheus/meta.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/conda/recipes/morpheus/meta.yaml b/ci/conda/recipes/morpheus/meta.yaml index 04d30a6105..5e94706072 100644 --- a/ci/conda/recipes/morpheus/meta.yaml +++ b/ci/conda/recipes/morpheus/meta.yaml @@ -80,7 +80,7 @@ outputs: - grpcio # Version determined from cudf - libmrc - libwebp>=1.3.2 # Required for CVE mitigation: https://nvd.nist.gov/vuln/detail/CVE-2023-4863 - - mlflow>=2.2.1,<2.7 + - mlflow>=2.2.1,<3 - mrc - networkx 3.1.* - numpydoc 1.4.* From f783abd63ad8a2539a5ea9cc7755f31a9e39bbd8 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 5 Oct 2023 09:48:22 -0700 Subject: [PATCH 27/65] Fix mlflow version --- docker/conda/environments/cuda11.8_examples.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/conda/environments/cuda11.8_examples.yml b/docker/conda/environments/cuda11.8_examples.yml index dfbe653690..a7b8b1b49b 100644 --- a/docker/conda/environments/cuda11.8_examples.yml +++ b/docker/conda/environments/cuda11.8_examples.yml @@ -32,6 +32,6 @@ dependencies: - dill=0.3.6 - distributed>=2023.1.1 - libwebp>=1.3.2 # Required for CVE mitigation: https://nvd.nist.gov/vuln/detail/CVE-2023-4863 - - mlflow>=2.2.1,<2.7 + - mlflow>=2.2.1,<3 - papermill=2.3.4 - s3fs>=2023.6 From d5a55ac21e7eabb4cef4c468e25c3b085461aaf4 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 5 Oct 2023 11:51:37 -0700 Subject: [PATCH 28/65] Rename StreamWrapper to BaseStage to reflect that we are no longer using cuStreamz [no ci] --- morpheus/pipeline/__init__.py | 2 +- morpheus/pipeline/linear_pipeline.py | 2 +- morpheus/pipeline/pipeline.py | 18 +++++++++--------- morpheus/pipeline/receiver.py | 2 +- morpheus/pipeline/sender.py | 4 ++-- morpheus/pipeline/source_stage.py | 2 +- morpheus/pipeline/stage.py | 2 +- morpheus/pipeline/stage_schema.py | 4 ++-- morpheus/pipeline/stream_wrapper.py | 10 +++++----- 9 files changed, 23 insertions(+), 23 deletions(-) diff --git a/morpheus/pipeline/__init__.py b/morpheus/pipeline/__init__.py index 44e5d2fc77..01257291e0 100644 --- a/morpheus/pipeline/__init__.py +++ b/morpheus/pipeline/__init__.py @@ -22,7 +22,7 @@ from morpheus.pipeline.stage_schema import StageSchema from morpheus.pipeline.sender import Sender from morpheus.pipeline.receiver import Receiver -from morpheus.pipeline.stream_wrapper import StreamWrapper +from morpheus.pipeline.stream_wrapper import BaseStage from morpheus.pipeline.stage import Stage from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.pipeline.multi_message_stage import MultiMessageStage diff --git a/morpheus/pipeline/linear_pipeline.py b/morpheus/pipeline/linear_pipeline.py index 33191c4db0..dc7217ae34 100644 --- a/morpheus/pipeline/linear_pipeline.py +++ b/morpheus/pipeline/linear_pipeline.py @@ -44,7 +44,7 @@ def __init__(self, c: Config): self._next_segment_index = 0 self._increment_segment_id() - self._linear_stages: typing.List[_pipeline.StreamWrapper] = [] + self._linear_stages: typing.List[_pipeline.BaseStage] = [] def _increment_segment_id(self): self._linear_stages = [] diff --git a/morpheus/pipeline/pipeline.py b/morpheus/pipeline/pipeline.py index 35d93c9181..94152f7aad 100644 --- a/morpheus/pipeline/pipeline.py +++ b/morpheus/pipeline/pipeline.py @@ -34,12 +34,12 @@ from morpheus.pipeline.sender import Sender from morpheus.pipeline.source_stage import SourceStage from morpheus.pipeline.stage import Stage -from morpheus.pipeline.stream_wrapper import StreamWrapper +from morpheus.pipeline.stream_wrapper import BaseStage from morpheus.utils.type_utils import pretty_print_type_name logger = logging.getLogger(__name__) -StageT = typing.TypeVar("StageT", bound=StreamWrapper) +StageT = typing.TypeVar("StageT", bound=BaseStage) class Pipeline(): @@ -131,7 +131,7 @@ def add_stage(self, stage: StageT, segment_id: str = "main") -> StageT: return stage def add_edge(self, - start: typing.Union[StreamWrapper, Sender], + start: typing.Union[BaseStage, Sender], end: typing.Union[Stage, Receiver], segment_id: str = "main"): """ @@ -150,7 +150,7 @@ def add_edge(self, """ self._assert_not_built() - if (isinstance(start, StreamWrapper)): + if (isinstance(start, BaseStage)): start_port = start.output_ports[0] elif (isinstance(start, Sender)): start_port = start @@ -255,7 +255,7 @@ def _pre_build(self): # Finally, execute the link phase (only necessary for circular pipelines) # for s in source_and_stages: for stage in segment_graph.nodes(): - for port in typing.cast(StreamWrapper, stage).input_ports: + for port in typing.cast(BaseStage, stage).input_ports: port.link_schema() logger.info("====Pre-Building Segment Complete!====") @@ -311,7 +311,7 @@ def inner_build(builder: mrc.Builder, segment_id: str): # Finally, execute the link phase (only necessary for circular pipelines) for stage in segment_graph.nodes(): - for port in typing.cast(StreamWrapper, stage).input_ports: + for port in typing.cast(BaseStage, stage).input_ports: port.link_node(builder=builder) asyncio.run(self._async_start(segment_graph.nodes())) @@ -454,7 +454,7 @@ def visualize(self, filename: str = None, **graph_kwargs): start_def_port = ":e" if is_lr else ":s" end_def_port = ":w" if is_lr else ":n" - def has_ports(node: StreamWrapper, is_input): + def has_ports(node: BaseStage, is_input): if (is_input): return len(node.input_ports) > 0 @@ -465,7 +465,7 @@ def has_ports(node: StreamWrapper, is_input): gv_subgraphs[segment_id] = graphviz.Digraph(f"cluster_{segment_id}") gv_subgraph = gv_subgraphs[segment_id] gv_subgraph.attr(label=segment_id) - for name, attrs in typing.cast(typing.Mapping[StreamWrapper, dict], + for name, attrs in typing.cast(typing.Mapping[BaseStage, dict], self._segment_graphs[segment_id].nodes).items(): node_attrs = attrs.copy() @@ -504,7 +504,7 @@ def has_ports(node: StreamWrapper, is_input): # Build up edges for segment_id in self._segments: gv_subgraph = gv_subgraphs[segment_id] - for e, attrs in typing.cast(typing.Mapping[typing.Tuple[StreamWrapper, StreamWrapper], dict], + for e, attrs in typing.cast(typing.Mapping[typing.Tuple[BaseStage, BaseStage], dict], self._segment_graphs[segment_id].edges()).items(): # noqa: E501 edge_attrs = {} diff --git a/morpheus/pipeline/receiver.py b/morpheus/pipeline/receiver.py index 1000b459f5..730e8e38af 100644 --- a/morpheus/pipeline/receiver.py +++ b/morpheus/pipeline/receiver.py @@ -35,7 +35,7 @@ class Receiver(): Receiver port number. """ - def __init__(self, parent: "_pipeline.StreamWrapper", port_number: int): + def __init__(self, parent: "_pipeline.BaseStage", port_number: int): self._parent = parent self.port_number = port_number diff --git a/morpheus/pipeline/sender.py b/morpheus/pipeline/sender.py index ec74251b7f..0412cad8ef 100644 --- a/morpheus/pipeline/sender.py +++ b/morpheus/pipeline/sender.py @@ -34,7 +34,7 @@ class Sender(): Sender port number. """ - def __init__(self, parent: "_pipeline.StreamWrapper", port_number: int): + def __init__(self, parent: "_pipeline.BaseStage", port_number: int): self._parent = parent self.port_number = port_number @@ -45,7 +45,7 @@ def __init__(self, parent: "_pipeline.StreamWrapper", port_number: int): self._output_node: mrc.SegmentObject = None @property - def parent(self) -> "_pipeline.StreamWrapper": + def parent(self) -> "_pipeline.BaseStage": return self._parent @property diff --git a/morpheus/pipeline/source_stage.py b/morpheus/pipeline/source_stage.py index abd2b874f1..8ca491356a 100644 --- a/morpheus/pipeline/source_stage.py +++ b/morpheus/pipeline/source_stage.py @@ -24,7 +24,7 @@ logger = logging.getLogger(__name__) -class SourceStage(_pipeline.StreamWrapper): +class SourceStage(_pipeline.BaseStage): """ The SourceStage is mandatory for the Morpheus pipeline to run. This stage represents the start of the pipeline. All `SourceStage` object take no input but generate output. diff --git a/morpheus/pipeline/stage.py b/morpheus/pipeline/stage.py index 8afeac3a37..2ea202db22 100644 --- a/morpheus/pipeline/stage.py +++ b/morpheus/pipeline/stage.py @@ -22,7 +22,7 @@ logger = logging.getLogger(__name__) -class Stage(_pipeline.StreamWrapper): +class Stage(_pipeline.BaseStage): """ This class serves as the base for all pipeline stage implementations that are not source objects. diff --git a/morpheus/pipeline/stage_schema.py b/morpheus/pipeline/stage_schema.py index bf2ceb277b..c3653e15fa 100644 --- a/morpheus/pipeline/stage_schema.py +++ b/morpheus/pipeline/stage_schema.py @@ -16,7 +16,7 @@ import typing if typing.TYPE_CHECKING: - from .stream_wrapper import StreamWrapper + from .stream_wrapper import BaseStage class PortSchema: @@ -43,7 +43,7 @@ def is_complete(self) -> bool: class StageSchema: - def __init__(self, stage: "StreamWrapper"): + def __init__(self, stage: "BaseStage"): self._input_schemas = [] for port in stage.input_ports: input_schema = port.get_input_schema() diff --git a/morpheus/pipeline/stream_wrapper.py b/morpheus/pipeline/stream_wrapper.py index 2ba04249ce..2a7efbbe97 100644 --- a/morpheus/pipeline/stream_wrapper.py +++ b/morpheus/pipeline/stream_wrapper.py @@ -38,7 +38,7 @@ def _save_init_vals(func: _DecoratorType) -> _DecoratorType: sig = inspect.signature(func, follow_wrapped=True) @functools.wraps(func) - def inner(self: "StreamWrapper", *args, **kwargs): + def inner(self: "BaseStage", *args, **kwargs): # Actually call init first. This way any super classes strings will be overridden func(self, *args, **kwargs) @@ -63,7 +63,7 @@ def inner(self: "StreamWrapper", *args, **kwargs): return typing.cast(_DecoratorType, inner) -class StreamWrapper(ABC, collections.abc.Hashable): +class BaseStage(ABC, collections.abc.Hashable): """ This abstract class serves as the morpheus pipeline's base class. This class wraps a `mrc.SegmentObject` object and aids in hooking stages up together. @@ -81,7 +81,7 @@ def __init__(self, config: Config): # Save the config self._config = config - self._id = StreamWrapper.__ID_COUNTER.get_and_inc() + self._id = BaseStage.__ID_COUNTER.get_and_inc() self._pipeline: _pipeline.Pipeline = None self._init_str: str = "" # Stores the initialization parameters used for creation. Needed for __repr__ @@ -228,7 +228,7 @@ def get_all_inputs(self) -> list[_pipeline.Sender]: return senders - def get_all_input_stages(self) -> list["StreamWrapper"]: + def get_all_input_stages(self) -> list["BaseStage"]: """ Get all input stages to this stage. @@ -255,7 +255,7 @@ def get_all_outputs(self) -> list[_pipeline.Receiver]: return receivers - def get_all_output_stages(self) -> list["StreamWrapper"]: + def get_all_output_stages(self) -> list["BaseStage"]: """ Get all output stages from this stage. From 5d06a45b494111331aaf0e31c6918d090e2113d1 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 5 Oct 2023 12:01:13 -0700 Subject: [PATCH 29/65] WIP: [no ci] --- morpheus/pipeline/linear_pipeline.py | 4 ++-- morpheus/pipeline/pipeline.py | 4 ++-- morpheus/pipeline/receiver.py | 6 +++--- morpheus/pipeline/sender.py | 6 +++--- morpheus/pipeline/stream_wrapper.py | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/morpheus/pipeline/linear_pipeline.py b/morpheus/pipeline/linear_pipeline.py index dc7217ae34..67a4e15c0c 100644 --- a/morpheus/pipeline/linear_pipeline.py +++ b/morpheus/pipeline/linear_pipeline.py @@ -149,8 +149,8 @@ def add_segment_boundary(self, data_type=None, as_shared_pointer=False): boundary_port_id=self._current_segment_id, data_type=data_type) - # TODO: update to use data_type once typeid is attached to registered objects out of band: - # https://github.com/nv-morpheus/MRC/issues/176 + # TODO: update to use data_type once typeid is attached to registered objects out of band # pylint:disable=fixme + # https://github.com/nv-morpheus/MRC/issues/176 port_id_tuple = (self._current_segment_id, object, False) if data_type else self._current_segment_id self.add_stage(boundary_egress) diff --git a/morpheus/pipeline/pipeline.py b/morpheus/pipeline/pipeline.py index 94152f7aad..ffcfc1b526 100644 --- a/morpheus/pipeline/pipeline.py +++ b/morpheus/pipeline/pipeline.py @@ -139,7 +139,7 @@ def add_edge(self, Parameters ---------- - start : typing.Union[StreamWrapper, Sender] + start : typing.Union[BaseStage, Sender] The start of the edge or parent stage. end : typing.Union[Stage, Receiver] @@ -265,7 +265,7 @@ def _pre_build(self): def build(self): """ This function sequentially activates all the Morpheus pipeline stages passed by the users to execute a - pipeline. For the `Source` and all added `Stage` objects, `StreamWrapper.build` will be called sequentially to + pipeline. For the `Source` and all added `Stage` objects, `BaseStage.build` will be called sequentially to construct the pipeline. Once the pipeline has been constructed, this will start the pipeline by calling `Source.start` on the source diff --git a/morpheus/pipeline/receiver.py b/morpheus/pipeline/receiver.py index 730e8e38af..602cb7c09f 100644 --- a/morpheus/pipeline/receiver.py +++ b/morpheus/pipeline/receiver.py @@ -25,12 +25,12 @@ class Receiver(): """ - The `Receiver` object represents a downstream port on a `StreamWrapper` object that gets messages from a `Sender`. + The `Receiver` object represents a downstream port on a `BaseStage` object that gets messages from a `Sender`. Parameters ---------- - parent : `morpheus.pipeline.pipeline.StreamWrapper` - Parent `StreamWrapper` object. + parent : `morpheus.pipeline.pipeline.BaseStage` + Parent `BaseStage` object. port_number : int Receiver port number. """ diff --git a/morpheus/pipeline/sender.py b/morpheus/pipeline/sender.py index 0412cad8ef..d9696fa81e 100644 --- a/morpheus/pipeline/sender.py +++ b/morpheus/pipeline/sender.py @@ -24,12 +24,12 @@ class Sender(): """ - The `Sender` object represents a port on a `StreamWrapper` object that sends messages to a `Receiver`. + The `Sender` object represents a port on a `BaseStage` object that sends messages to a `Receiver`. Parameters ---------- - parent : `morpheus.pipeline.pipeline.StreamWrapper` - Parent `StreamWrapper` object. + parent : `morpheus.pipeline.pipeline.BaseStage` + Parent `BaseStage` object. port_number : int Sender port number. """ diff --git a/morpheus/pipeline/stream_wrapper.py b/morpheus/pipeline/stream_wrapper.py index 2a7efbbe97..485e392cf4 100644 --- a/morpheus/pipeline/stream_wrapper.py +++ b/morpheus/pipeline/stream_wrapper.py @@ -234,7 +234,7 @@ def get_all_input_stages(self) -> list["BaseStage"]: Returns ------- - list[`morpheus.pipeline.pipeline.StreamWrapper`] + list[`morpheus.pipeline.pipeline.BaseStage`] All input stages. """ return [x.parent for x in self.get_all_inputs()] @@ -261,7 +261,7 @@ def get_all_output_stages(self) -> list["BaseStage"]: Returns ------- - list[`morpheus.pipeline.pipeline.StreamWrapper`] + list[`morpheus.pipeline.pipeline.BaseStage`] All output stages. """ return [x.parent for x in self.get_all_outputs()] From 64cd7f869a7185b72ea4742d623db5e5ef10b463 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 5 Oct 2023 12:02:05 -0700 Subject: [PATCH 30/65] WIP: [no ci] --- morpheus/pipeline/__init__.py | 2 +- morpheus/pipeline/{stream_wrapper.py => base_stage.py} | 0 morpheus/pipeline/pipeline.py | 2 +- morpheus/pipeline/stage_schema.py | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename morpheus/pipeline/{stream_wrapper.py => base_stage.py} (100%) diff --git a/morpheus/pipeline/__init__.py b/morpheus/pipeline/__init__.py index 01257291e0..6984c702b5 100644 --- a/morpheus/pipeline/__init__.py +++ b/morpheus/pipeline/__init__.py @@ -22,7 +22,7 @@ from morpheus.pipeline.stage_schema import StageSchema from morpheus.pipeline.sender import Sender from morpheus.pipeline.receiver import Receiver -from morpheus.pipeline.stream_wrapper import BaseStage +from morpheus.pipeline.base_stage import BaseStage from morpheus.pipeline.stage import Stage from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.pipeline.multi_message_stage import MultiMessageStage diff --git a/morpheus/pipeline/stream_wrapper.py b/morpheus/pipeline/base_stage.py similarity index 100% rename from morpheus/pipeline/stream_wrapper.py rename to morpheus/pipeline/base_stage.py diff --git a/morpheus/pipeline/pipeline.py b/morpheus/pipeline/pipeline.py index ffcfc1b526..5acd1a0245 100644 --- a/morpheus/pipeline/pipeline.py +++ b/morpheus/pipeline/pipeline.py @@ -34,7 +34,7 @@ from morpheus.pipeline.sender import Sender from morpheus.pipeline.source_stage import SourceStage from morpheus.pipeline.stage import Stage -from morpheus.pipeline.stream_wrapper import BaseStage +from morpheus.pipeline.base_stage import BaseStage from morpheus.utils.type_utils import pretty_print_type_name logger = logging.getLogger(__name__) diff --git a/morpheus/pipeline/stage_schema.py b/morpheus/pipeline/stage_schema.py index c3653e15fa..696510c683 100644 --- a/morpheus/pipeline/stage_schema.py +++ b/morpheus/pipeline/stage_schema.py @@ -16,7 +16,7 @@ import typing if typing.TYPE_CHECKING: - from .stream_wrapper import BaseStage + from .base_stage import BaseStage class PortSchema: From 98b11402de490d15d10433cbbb64e0ac3dd150dc Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 5 Oct 2023 12:24:58 -0700 Subject: [PATCH 31/65] Indicate that the as_shared_pointer parameter is unused [no ci] --- morpheus/pipeline/linear_pipeline.py | 6 ++++-- tests/test_multi_segment.py | 11 +++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/morpheus/pipeline/linear_pipeline.py b/morpheus/pipeline/linear_pipeline.py index 67a4e15c0c..fc4cdefadd 100644 --- a/morpheus/pipeline/linear_pipeline.py +++ b/morpheus/pipeline/linear_pipeline.py @@ -83,7 +83,7 @@ def set_source(self, source: SourceT) -> SourceT: return source - def add_stage(self, stage: SinglePortStageT) -> SinglePortStageT: + def add_stage(self, stage: SinglePortStageT) -> SinglePortStageT: # pylint:disable=arguments-differ """ Add a stage to the pipeline. All `Stage` classes added with this method will be executed sequentially inthe order they were added. @@ -118,7 +118,7 @@ def add_segment_boundary(self, data_type=None, as_shared_pointer=False): if 'data_type' has no registered edge adapters. as_shared_pointer : `boolean` - Whether the data type will be wrapped in a shared pointer. + Whether the data type will be wrapped in a shared pointer. Currently this is not implemented. Examples -------- @@ -138,6 +138,8 @@ def add_segment_boundary(self, data_type=None, as_shared_pointer=False): >>> >>> pipe.run() """ + assert as_shared_pointer is False, "Shared pointers are not currently supported" + if (len(self._linear_stages) == 0): raise RuntimeError("Cannot create a segment boundary, current segment is empty.") diff --git a/tests/test_multi_segment.py b/tests/test_multi_segment.py index b5d5e7d2a4..bca7417b4c 100644 --- a/tests/test_multi_segment.py +++ b/tests/test_multi_segment.py @@ -47,3 +47,14 @@ def test_multi_segment_bad_data_type(config, filter_probs_df): pipe.run() assert len(mem_sink.get_messages()) == 0 + + +def test_add_segment_boundary_as_shared_pointer_error(config, filter_probs_df): + """ + Test for the assertion error raised when `as_shared_pointer=True` is passed to `add_segment_boundary`. + Remove this test when the `as_shared_pointer` functionality is implemented. + """ + with pytest.raises(AssertionError): + pipe = LinearPipeline(config) + pipe.set_source(InMemorySourceStage(config, [filter_probs_df])) + pipe.add_segment_boundary(MessageMeta, as_shared_pointer=True) From 763b88eb80ff0b5d5616d8a29831126e5d8752bd Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 5 Oct 2023 13:09:53 -0700 Subject: [PATCH 32/65] Update deltalake stages to support stage schemas [no ci] --- .../input/databricks_deltalake_source_stage.py | 10 ++++++---- .../output/write_to_databricks_deltalake_stage.py | 13 ++++++------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/morpheus/stages/input/databricks_deltalake_source_stage.py b/morpheus/stages/input/databricks_deltalake_source_stage.py index 2fb87da5d6..ff267c9c8d 100644 --- a/morpheus/stages/input/databricks_deltalake_source_stage.py +++ b/morpheus/stages/input/databricks_deltalake_source_stage.py @@ -26,7 +26,7 @@ from morpheus.messages.message_meta import MessageMeta from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.single_output_source import SingleOutputSource -from morpheus.pipeline.stream_pair import StreamPair +from morpheus.pipeline.stage_schema import StageSchema logger = logging.getLogger(__name__) @@ -75,9 +75,11 @@ def name(self) -> str: def supports_cpp_node(self) -> bool: return False - def _build_source(self, builder: mrc.Builder) -> StreamPair: - node = builder.make_source(self.unique_name, self.source_generator) - return node, MessageMeta + def compute_schema(self, schema: StageSchema): + schema.output_schema.set_type(MessageMeta) + + def _build_source(self, builder: mrc.Builder) -> mrc.SegmentObject: + return builder.make_source(self.unique_name, self.source_generator) def source_generator(self): try: diff --git a/morpheus/stages/output/write_to_databricks_deltalake_stage.py b/morpheus/stages/output/write_to_databricks_deltalake_stage.py index fc9d7228b8..1b710ca056 100644 --- a/morpheus/stages/output/write_to_databricks_deltalake_stage.py +++ b/morpheus/stages/output/write_to_databricks_deltalake_stage.py @@ -34,14 +34,15 @@ from morpheus.cli.register_stage import register_stage from morpheus.config import Config from morpheus.messages import MessageMeta +from morpheus.pipeline.pass_thru_type_mixin import PassThruTypeMixin from morpheus.pipeline.single_port_stage import SinglePortStage -from morpheus.pipeline.stream_pair import StreamPair +from morpheus.pipeline.stage_schema import StageSchema logger = logging.getLogger(__name__) @register_stage("to-databricks-deltalake") -class DataBricksDeltaLakeSinkStage(SinglePortStage): +class DataBricksDeltaLakeSinkStage(PassThruTypeMixin, SinglePortStage): """ Sink stage used to write messages to a DeltaLake table. @@ -89,8 +90,7 @@ def accepted_types(self) -> typing.Tuple: def supports_cpp_node(self) -> bool: return False - def _build_single(self, builder: mrc.Builder, input_stream: StreamPair) -> StreamPair: - stream = input_stream[0] + def _build_single(self, builder: mrc.Builder, input_node: mrc.SegmentObject) -> mrc.SegmentObject: def write_to_deltalake(meta: MessageMeta): """ @@ -109,10 +109,9 @@ def write_to_deltalake(meta: MessageMeta): return meta node = builder.make_node(self.unique_name, ops.map(write_to_deltalake)) - builder.make_edge(stream, node) + builder.make_edge(input_node, node) - # Return input unchanged to allow passthrough - return node, input_stream[1] + return node @staticmethod def _extract_schema_from_pandas_dataframe(df: pd.DataFrame) -> StructType: From d57ec2075bd2489e77ec6b161e80ff9cea57a35a Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 5 Oct 2023 13:12:49 -0700 Subject: [PATCH 33/65] Remove unused import [no ci] --- morpheus/stages/output/write_to_databricks_deltalake_stage.py | 1 - 1 file changed, 1 deletion(-) diff --git a/morpheus/stages/output/write_to_databricks_deltalake_stage.py b/morpheus/stages/output/write_to_databricks_deltalake_stage.py index 1b710ca056..05f7c4de77 100644 --- a/morpheus/stages/output/write_to_databricks_deltalake_stage.py +++ b/morpheus/stages/output/write_to_databricks_deltalake_stage.py @@ -36,7 +36,6 @@ from morpheus.messages import MessageMeta from morpheus.pipeline.pass_thru_type_mixin import PassThruTypeMixin from morpheus.pipeline.single_port_stage import SinglePortStage -from morpheus.pipeline.stage_schema import StageSchema logger = logging.getLogger(__name__) From 3c0863f97af38917f4a1dd54a3029e44fff6e70f Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 5 Oct 2023 13:22:58 -0700 Subject: [PATCH 34/65] Allow test to run in both Python & C++ mode [no ci] --- tests/test_databricks_deltalake_source_stage.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/test_databricks_deltalake_source_stage.py b/tests/test_databricks_deltalake_source_stage.py index 6309e3c27d..bbe364f896 100644 --- a/tests/test_databricks_deltalake_source_stage.py +++ b/tests/test_databricks_deltalake_source_stage.py @@ -26,14 +26,14 @@ from morpheus.stages.output.compare_dataframe_stage import CompareDataFrameStage -@pytest.mark.use_pandas -def test_databricks_deltalake_source_stage_pipe(config: Config, dataset: DatasetManager): +def test_databricks_deltalake_source_stage_pipe(config: Config, dataset_cudf: DatasetManager): """ Test the DataBricksDeltaLakeSourceStage against a mock spark session which will return spark_df converted into a DataFrame with specific rows per page. """ - expected_df = dataset['filter_probs.csv'] + expected_df = dataset_cudf['filter_probs.csv'] + df = expected_df.to_pandas() # pylint: disable=unused-variable with patch('morpheus.stages.input.databricks_deltalake_source_stage.DatabricksSession') as mock_db_session: # NOQA databricks_deltalake_source_stage = DataBricksDeltaLakeSourceStage(config, @@ -43,11 +43,11 @@ def test_databricks_deltalake_source_stage_pipe(config: Config, dataset: Dataset databricks_token="", databricks_cluster_id="") databricks_deltalake_source_stage.spark.sql.return_value.withColumn.return_value.select.return_value.\ - withColumn.return_value.where.return_value.toPandas.return_value.drop.return_value = expected_df + withColumn.return_value.where.return_value.toPandas.return_value.drop.return_value = df databricks_deltalake_source_stage.spark.sql.return_value.withColumn.return_value.select.return_value. \ - withColumn.return_value.count.return_value = expected_df.shape[0] + withColumn.return_value.count.return_value = df.shape[0] pipe = LinearPipeline(config) pipe.set_source(databricks_deltalake_source_stage) - comp_stage = pipe.add_stage(CompareDataFrameStage(config, cudf.from_pandas(expected_df))) + comp_stage = pipe.add_stage(CompareDataFrameStage(config, expected_df)) pipe.run() assert_results(comp_stage.get_results()) From f2803db9b3b483f7609deddf4d1db386082ea765 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 5 Oct 2023 14:21:36 -0700 Subject: [PATCH 35/65] Mark the complete method as private [no ci] --- morpheus/pipeline/base_stage.py | 2 +- morpheus/pipeline/receiver.py | 2 +- morpheus/pipeline/stage_schema.py | 14 ++++++++------ 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/morpheus/pipeline/base_stage.py b/morpheus/pipeline/base_stage.py index 485e392cf4..4f9a08bdaa 100644 --- a/morpheus/pipeline/base_stage.py +++ b/morpheus/pipeline/base_stage.py @@ -347,7 +347,7 @@ def _pre_build(self, do_propagate: bool = True): (f"Prebuild expected `schema.output_schemas` to be of length {len(self.output_ports)} " f"(one for each output port), but got {len(schema.output_schemas)}.") - schema.complete() + schema._complete() for (port_idx, port_schema) in enumerate(schema.output_schemas): self.output_ports[port_idx].output_schema = port_schema diff --git a/morpheus/pipeline/receiver.py b/morpheus/pipeline/receiver.py index 602cb7c09f..f1532e2ca9 100644 --- a/morpheus/pipeline/receiver.py +++ b/morpheus/pipeline/receiver.py @@ -111,7 +111,7 @@ def _compute_input_schema(self): "Use a merge stage to handle different types of inputs.")) self._input_schema = _pipeline.PortSchema(port_type=great_ancestor) - self._input_schema.complete() + self._input_schema._complete() self._is_schema_linked = True def get_input_schema(self) -> _pipeline.PortSchema: diff --git a/morpheus/pipeline/stage_schema.py b/morpheus/pipeline/stage_schema.py index 696510c683..021dc26b44 100644 --- a/morpheus/pipeline/stage_schema.py +++ b/morpheus/pipeline/stage_schema.py @@ -32,8 +32,8 @@ def set_type(self, value: type): assert not self._completed, "Attempted to set type on completed PortSchema" self._type = value - def complete(self): - assert not self._completed, "Attempted to PortSchema.complete() twice" + def _complete(self): + assert not self._completed, "Attempted to PortSchema._complete() twice" assert self._type is not None, "Attempted to complete PortSchema without setting type" self._completed = True @@ -101,11 +101,13 @@ def output_schema(self) -> PortSchema: "Attempted to access output_schema property on StageSchema with multiple outputs" return self._output_schemas[0] - def complete(self): + def _complete(self): """ - Calls complete on all output port schemas. - This will trigger an assertion error if any of the output port schemas do not have a type set. + Calls `_complete` on all output port schemas. + This will trigger an assertion error if any of the output port schemas do not have a type set, or have + previously been completed. Users should not call this function directly, as this is called internally by the + `BaseStage` and `Receiver` classes. """ for port_schema in self.output_schemas: # This locks the port schema - port_schema.complete() + port_schema._complete() From c521cd41c7d04ddb9bc2b2a60eacb070ca411e6f Mon Sep 17 00:00:00 2001 From: Devin Robison Date: Thu, 5 Oct 2023 16:53:29 -0600 Subject: [PATCH 36/65] [DRAFT] Add model and experiment template 'click' options to dfp example pipelines, and make model names Databricks compatible. (#1245) Closes issue #1244 Authors: - Devin Robison (https://github.com/drobison00) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1245 --- .../morpheus/dfp/utils/dfp_arg_parser.py | 6 ++-- .../production/morpheus/dfp_azure_pipeline.py | 14 +++++++-- .../production/morpheus/dfp_duo_pipeline.py | 14 +++++++-- .../dfp_integrated_training_batch_pipeline.py | 12 ++++++++ ..._integrated_training_streaming_pipeline.py | 12 ++++++++ .../mlflow_model_writer_controller.py | 29 +++++++++++++++++-- 6 files changed, 78 insertions(+), 9 deletions(-) diff --git a/examples/digital_fingerprinting/production/morpheus/dfp/utils/dfp_arg_parser.py b/examples/digital_fingerprinting/production/morpheus/dfp/utils/dfp_arg_parser.py index 4b807443ad..2c5dc1bc4d 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp/utils/dfp_arg_parser.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp/utils/dfp_arg_parser.py @@ -46,6 +46,8 @@ def __init__(self, source: str, tracking_uri: str, silence_monitors: bool, + mlflow_experiment_name_formatter: str, + mlflow_model_name_formatter: str, train_users: str = None): self._skip_users = list(skip_user) @@ -65,8 +67,8 @@ def __init__(self, self._time_fields: TimeFields = None self._silence_monitors = silence_monitors - self._model_name_formatter = f"DFP-{source}-" + "{user_id}" - self._experiment_name_formatter = f"dfp/{source}/training/" + "{reg_model_name}" + self._model_name_formatter = mlflow_model_name_formatter + self._experiment_name_formatter = mlflow_experiment_name_formatter @staticmethod def verify_init(func): diff --git a/examples/digital_fingerprinting/production/morpheus/dfp_azure_pipeline.py b/examples/digital_fingerprinting/production/morpheus/dfp_azure_pipeline.py index 41cccd83e9..fd83d4d327 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp_azure_pipeline.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp_azure_pipeline.py @@ -140,6 +140,14 @@ type=str, default="http://mlflow:5000", help=("The MLflow tracking URI to connect to the tracking backend.")) +@click.option('--mlflow_experiment_name_template', + type=str, + default="dfp/azure/training/{reg_model_name}", + help="The MLflow experiment name template to use when logging experiments. ") +@click.option('--mlflow_model_name_template', + type=str, + default="DFP-azure-{user_id}", + help="The MLflow model name template to use when logging models. ") def run_pipeline(train_users, skip_user: typing.Tuple[str], only_user: typing.Tuple[str], @@ -149,6 +157,8 @@ def run_pipeline(train_users, log_level, sample_rate_s, filter_threshold, + mlflow_experiment_name_template, + mlflow_model_name_template, **kwargs): """Runs the DFP pipeline.""" # To include the generic, we must be training all or generic @@ -311,8 +321,8 @@ def run_pipeline(train_users, # Output is UserMessageMeta -- Cached frame set pipeline.add_stage(DFPPreprocessingStage(config, input_schema=preprocess_schema)) - model_name_formatter = "DFP-azure-{user_id}" - experiment_name_formatter = "dfp/azure/training/{reg_model_name}" + model_name_formatter = mlflow_model_name_template + experiment_name_formatter = mlflow_experiment_name_template if (is_training): # Finally, perform training which will output a model diff --git a/examples/digital_fingerprinting/production/morpheus/dfp_duo_pipeline.py b/examples/digital_fingerprinting/production/morpheus/dfp_duo_pipeline.py index dd47a8a923..a9d588fae1 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp_duo_pipeline.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp_duo_pipeline.py @@ -141,6 +141,14 @@ type=str, default="http://mlflow:5000", help=("The MLflow tracking URI to connect to the tracking backend.")) +@click.option('--mlflow_experiment_name_template', + type=str, + default="dfp/duo/training/{reg_model_name}", + help="The MLflow experiment name template to use when logging experiments. ") +@click.option('--mlflow_model_name_template', + type=str, + default="DFP-duo-{user_id}", + help="The MLflow model name template to use when logging models. ") def run_pipeline(train_users, skip_user: typing.Tuple[str], only_user: typing.Tuple[str], @@ -150,6 +158,8 @@ def run_pipeline(train_users, log_level, sample_rate_s, filter_threshold, + mlflow_experiment_name_template, + mlflow_model_name_template, **kwargs): """Runs the DFP pipeline.""" # To include the generic, we must be training all or generic @@ -306,8 +316,8 @@ def run_pipeline(train_users, # Output is UserMessageMeta -- Cached frame set pipeline.add_stage(DFPPreprocessingStage(config, input_schema=preprocess_schema)) - model_name_formatter = "DFP-duo-{user_id}" - experiment_name_formatter = "dfp/duo/training/{reg_model_name}" + model_name_formatter = mlflow_model_name_template + experiment_name_formatter = mlflow_experiment_name_template if (is_training): diff --git a/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_batch_pipeline.py b/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_batch_pipeline.py index 8384a0ebaf..c18da19ee4 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_batch_pipeline.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_batch_pipeline.py @@ -101,6 +101,14 @@ type=str, default="http://mlflow:5000", help=("The MLflow tracking URI to connect to the tracking backend.")) +@click.option('--mlflow_experiment_name_template', + type=str, + default="dfp/{source}/training/{reg_model_name}", + help="The MLflow experiment name template to use when logging experiments. ") +@click.option('--mlflow_model_name_template', + type=str, + default="DFP-{source}-{user_id}", + help="The MLflow model name template to use when logging models. ") @click.option("--disable_pre_filtering", is_flag=True, help=("Enabling this option will skip pre-filtering of json messages. " @@ -126,6 +134,8 @@ def run_pipeline(source: str, tracking_uri, silence_monitors, use_cpp, + mlflow_experiment_name_template, + mlflow_model_name_template, **kwargs): if (skip_user and only_user): logging.error("Option --skip_user and --only_user are mutually exclusive. Exiting") @@ -140,6 +150,8 @@ def run_pipeline(source: str, source, tracking_uri, silence_monitors, + mlflow_experiment_name_template, + mlflow_model_name_template, train_users) dfp_arg_parser.init() diff --git a/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_streaming_pipeline.py b/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_streaming_pipeline.py index 6374a61f5d..e60792d6d3 100644 --- a/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_streaming_pipeline.py +++ b/examples/digital_fingerprinting/production/morpheus/dfp_integrated_training_streaming_pipeline.py @@ -101,6 +101,14 @@ type=str, default="http://mlflow:5000", help=("The MLflow tracking URI to connect to the tracking backend.")) +@click.option('--mlflow_experiment_name_template', + type=str, + default="dfp/{source}/training/{reg_model_name}", + help="The MLflow experiment name template to use when logging experiments. ") +@click.option('--mlflow_model_name_template', + type=str, + default="DFP-{source}-{user_id}", + help="The MLflow model name template to use when logging models. ") @click.option('--bootstrap_servers', type=str, default="localhost:9092", @@ -138,6 +146,8 @@ def run_pipeline(source: str, tracking_uri, silence_monitors, use_cpp, + mlflow_experiment_name_template, + mlflow_model_name_template, **kwargs): if (skip_user and only_user): logging.error("Option --skip_user and --only_user are mutually exclusive. Exiting") @@ -152,6 +162,8 @@ def run_pipeline(source: str, source, tracking_uri, silence_monitors, + mlflow_experiment_name_template, + mlflow_model_name_template, train_users) dfp_arg_parser.init() diff --git a/morpheus/controllers/mlflow_model_writer_controller.py b/morpheus/controllers/mlflow_model_writer_controller.py index d373a01dd1..bf935e8751 100644 --- a/morpheus/controllers/mlflow_model_writer_controller.py +++ b/morpheus/controllers/mlflow_model_writer_controller.py @@ -86,6 +86,27 @@ def experiment_name_formatter(self): def databricks_permissions(self): return self._databricks_permissions + def _create_safe_user_id(self, user_id: str): + """ + Creates a safe user ID for use in MLflow model names and experiment names. + + Parameters + ---------- + user_id : str + The user ID. + + Returns + ------- + str + The generated safe user ID. + """ + + safe_user_id = user_id.replace('.', '_dot_') + safe_user_id = safe_user_id.replace('/', '_slash_') + safe_user_id = safe_user_id.replace(':', '_colon_') + + return safe_user_id + def user_id_to_model(self, user_id: str): """ Converts a user ID to an model name @@ -102,7 +123,7 @@ def user_id_to_model(self, user_id: str): """ kwargs = { - "user_id": user_id, + "user_id": self._create_safe_user_id(user_id), "user_md5": hashlib.md5(user_id.encode('utf-8')).hexdigest(), } @@ -123,9 +144,11 @@ def user_id_to_experiment(self, user_id: str) -> str: The generated experiment name. """ + safe_user_id = self._create_safe_user_id(user_id) + kwargs = { - "user_id": user_id, - "user_md5": hashlib.md5(user_id.encode('utf-8')).hexdigest(), + "user_id": safe_user_id, + "user_md5": hashlib.md5(safe_user_id.encode('utf-8')).hexdigest(), "reg_model_name": self.user_id_to_model(user_id=user_id) } From 7cf3c474125018ba255ecdee84c2e31c5d641156 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 5 Oct 2023 16:01:19 -0700 Subject: [PATCH 37/65] Tests for PortSchema [no ci] --- tests/pipeline/test_port_schema.py | 63 ++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 tests/pipeline/test_port_schema.py diff --git a/tests/pipeline/test_port_schema.py b/tests/pipeline/test_port_schema.py new file mode 100644 index 0000000000..6ebd5c7982 --- /dev/null +++ b/tests/pipeline/test_port_schema.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python +# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from morpheus.pipeline.stage_schema import PortSchema + + +@pytest.mark.parametrize("port_type", [float, None]) +def test_port_schema_init(port_type: type): + port_schema = PortSchema(port_type=port_type) + assert port_schema.get_type() is port_type + assert not port_schema.is_complete() + + +@pytest.mark.parametrize("port_type", [float, None]) +def test_port_schema_set_type(port_type: type): + port_schema = PortSchema(port_type=port_type) + + port_schema.set_type(int) + assert port_schema.get_type() is int + + +def test_port_schema_complete(): + port_schema = PortSchema(port_type=float) + assert not port_schema.is_complete() + + port_schema._complete() + assert port_schema.is_complete() + + +def test_port_schema_complete_error_no_type(): + port_schema = PortSchema() + + with pytest.raises(AssertionError): + port_schema._complete() + + assert not port_schema.is_complete() + + +def test_port_schema_complete_error_called_twice(): + port_schema = PortSchema(port_type=float) + + port_schema._complete() + + with pytest.raises(AssertionError): + port_schema._complete() + + # Should still be complete + assert port_schema.is_complete() \ No newline at end of file From f0bb070ff758c5dc18d754e8e04c1ee3ce9734af Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 6 Oct 2023 09:34:37 -0700 Subject: [PATCH 38/65] WIP tests: [no ci] --- tests/pipeline/test_port_schema.py | 12 ++--- tests/pipeline/test_stage_schema.py | 68 +++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 6 deletions(-) create mode 100644 tests/pipeline/test_stage_schema.py diff --git a/tests/pipeline/test_port_schema.py b/tests/pipeline/test_port_schema.py index 6ebd5c7982..b1d42b2494 100644 --- a/tests/pipeline/test_port_schema.py +++ b/tests/pipeline/test_port_schema.py @@ -20,21 +20,21 @@ @pytest.mark.parametrize("port_type", [float, None]) -def test_port_schema_init(port_type: type): +def test_constructor(port_type: type): port_schema = PortSchema(port_type=port_type) assert port_schema.get_type() is port_type assert not port_schema.is_complete() @pytest.mark.parametrize("port_type", [float, None]) -def test_port_schema_set_type(port_type: type): +def test_set_type(port_type: type): port_schema = PortSchema(port_type=port_type) port_schema.set_type(int) assert port_schema.get_type() is int -def test_port_schema_complete(): +def test_complete(): port_schema = PortSchema(port_type=float) assert not port_schema.is_complete() @@ -42,7 +42,7 @@ def test_port_schema_complete(): assert port_schema.is_complete() -def test_port_schema_complete_error_no_type(): +def test_complete_error_no_type(): port_schema = PortSchema() with pytest.raises(AssertionError): @@ -51,7 +51,7 @@ def test_port_schema_complete_error_no_type(): assert not port_schema.is_complete() -def test_port_schema_complete_error_called_twice(): +def test_complete_error_called_twice(): port_schema = PortSchema(port_type=float) port_schema._complete() @@ -60,4 +60,4 @@ def test_port_schema_complete_error_called_twice(): port_schema._complete() # Should still be complete - assert port_schema.is_complete() \ No newline at end of file + assert port_schema.is_complete() diff --git a/tests/pipeline/test_stage_schema.py b/tests/pipeline/test_stage_schema.py new file mode 100644 index 0000000000..8ca6fd9354 --- /dev/null +++ b/tests/pipeline/test_stage_schema.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python +# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from morpheus.config import Config +from morpheus.pipeline import LinearPipeline +from morpheus.pipeline import Pipeline +from morpheus.pipeline.base_stage import BaseStage +from morpheus.pipeline.stage_schema import PortSchema +from morpheus.pipeline.stage_schema import StageSchema +from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage +from morpheus.stages.preprocess.deserialize_stage import DeserializeStage +from _utils.dataset_manager import DatasetManager +from _utils.stages.split_stage import SplitStage + + +@pytest.fixture(name="source_stage") +def source_stage_fixture(config: Config, dataset_cudf: DatasetManager): + df = dataset_cudf['filter_probs.csv'] + yield InMemorySourceStage(config, [df]) + + +@pytest.fixture(name="stage") +def stage_fixture(config: Config, source_stage: InMemorySourceStage): + stage = DeserializeStage(config) + + pipe = LinearPipeline(config) + pipe.set_source(source_stage) + pipe.add_stage(stage) + pipe.build() + + yield stage + + +@pytest.fixture(name="multiport_stage") +def multiport_stage_fixture(config: Config, source_stage: InMemorySourceStage): + stage = SplitStage(config) + + pipe = Pipeline(config) + pipe.add_stage(source_stage) + pipe.add_stage(stage) + pipe.add_edge(source_stage, stage) + pipe.build() + + yield stage + + +@pytest.mark.parametrize("stage_fixture,num_inputs,num_outputs", [("source_stage", 0, 1), ("stage", 1, 1), + ("multiport_stage", 1, 2)]) +def test_constructor(request: pytest.FixtureRequest, stage_fixture: str, num_inputs: int, num_outputs: int): + stage = request.getfixturevalue(stage_fixture) + schema = StageSchema(stage) + assert len(schema.input_schemas) == num_inputs + assert len(schema.output_schemas) == num_outputs From ca72c4365aa5031ea83c786c6470c9c7799d6fb0 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 6 Oct 2023 10:42:07 -0700 Subject: [PATCH 39/65] WIP [no ci] --- tests/pipeline/test_stage_schema.py | 91 ++++++++++++++++++++++------- 1 file changed, 71 insertions(+), 20 deletions(-) diff --git a/tests/pipeline/test_stage_schema.py b/tests/pipeline/test_stage_schema.py index 8ca6fd9354..2a2f3c5d7d 100644 --- a/tests/pipeline/test_stage_schema.py +++ b/tests/pipeline/test_stage_schema.py @@ -15,11 +15,13 @@ # limitations under the License. import pytest +import typing_utils from morpheus.config import Config -from morpheus.pipeline import LinearPipeline +from morpheus.messages import MessageMeta from morpheus.pipeline import Pipeline from morpheus.pipeline.base_stage import BaseStage +from morpheus.pipeline.source_stage import SourceStage from morpheus.pipeline.stage_schema import PortSchema from morpheus.pipeline.stage_schema import StageSchema from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage @@ -28,41 +30,90 @@ from _utils.stages.split_stage import SplitStage -@pytest.fixture(name="source_stage") -def source_stage_fixture(config: Config, dataset_cudf: DatasetManager): +@pytest.fixture(name="in_mem_source_stage") +def in_mem_source_stage_fixture(config: Config, dataset_cudf: DatasetManager): df = dataset_cudf['filter_probs.csv'] yield InMemorySourceStage(config, [df]) -@pytest.fixture(name="stage") -def stage_fixture(config: Config, source_stage: InMemorySourceStage): - stage = DeserializeStage(config) - - pipe = LinearPipeline(config) - pipe.set_source(source_stage) +def _build_ports(config: Config, source_stage: SourceStage, stage: BaseStage): + pipe = Pipeline(config) + pipe.add_stage(source_stage) pipe.add_stage(stage) + pipe.add_edge(source_stage, stage) pipe.build() + +@pytest.fixture(name="stage") +def stage_fixture(config: Config, in_mem_source_stage: InMemorySourceStage): + stage = DeserializeStage(config) + _build_ports(config=config, source_stage=in_mem_source_stage, stage=stage) + yield stage @pytest.fixture(name="multiport_stage") -def multiport_stage_fixture(config: Config, source_stage: InMemorySourceStage): +def multiport_stage_fixture(config: Config, in_mem_source_stage: InMemorySourceStage): stage = SplitStage(config) - - pipe = Pipeline(config) - pipe.add_stage(source_stage) - pipe.add_stage(stage) - pipe.add_edge(source_stage, stage) - pipe.build() + _build_ports(config=config, source_stage=in_mem_source_stage, stage=stage) yield stage -@pytest.mark.parametrize("stage_fixture,num_inputs,num_outputs", [("source_stage", 0, 1), ("stage", 1, 1), - ("multiport_stage", 1, 2)]) -def test_constructor(request: pytest.FixtureRequest, stage_fixture: str, num_inputs: int, num_outputs: int): - stage = request.getfixturevalue(stage_fixture) +@pytest.mark.parametrize("stage_fixture_name,num_inputs,num_outputs", [("in_mem_source_stage", 0, 1), ("stage", 1, 1), + ("multiport_stage", 1, 2)]) +def test_constructor(request: pytest.FixtureRequest, stage_fixture_name: str, num_inputs: int, num_outputs: int): + stage = request.getfixturevalue(stage_fixture_name) schema = StageSchema(stage) assert len(schema.input_schemas) == num_inputs assert len(schema.output_schemas) == num_outputs + + +def test_single_port_input_schemas(stage: DeserializeStage): + schema = StageSchema(stage) + assert len(schema.input_schemas) == 1 + + port_schema = schema.input_schemas[0] + assert port_schema.get_type() is MessageMeta + + assert schema.input_schema is port_schema + + +def test_single_port_input_types(stage: DeserializeStage): + schema = StageSchema(stage) + assert len(schema.input_types) == 1 + + assert schema.input_types[0] is MessageMeta + assert schema.input_type is MessageMeta + + +def test_single_port_output_schemas(in_mem_source_stage: InMemorySourceStage): + schema = StageSchema(in_mem_source_stage) + in_mem_source_stage.compute_schema(schema) + assert len(schema.output_schemas) == 1 + + port_schema = schema.output_schemas[0] + assert port_schema.get_type() is MessageMeta + + assert schema.output_schema is port_schema + + +def test_multi_port_output_schemas(multiport_stage: SplitStage): + schema = StageSchema(multiport_stage) + multiport_stage.compute_schema(schema) + assert len(schema.output_schemas) == 2 + + for port_schema in schema.output_schemas: + assert port_schema.get_type() is MessageMeta + + +def test_output_schema_multi_error(multiport_stage: SplitStage): + """ + Test confirms that the output_schema property raises an error when there are multiple output schemas + """ + schema = StageSchema(multiport_stage) + multiport_stage.compute_schema(schema) + assert len(schema.output_schemas) == 2 + + with pytest.raises(AssertionError): + schema.output_schema From bae6e4eae46830e82eba5bdb443e15c2343ae992 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 6 Oct 2023 10:46:47 -0700 Subject: [PATCH 40/65] First pass at some mult-port stages for testing [no ci] --- .../stages/in_memory_multi_source_stage.py | 59 +++++++++++++++++++ tests/_utils/stages/multi_port_pass_thru.py | 55 +++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 tests/_utils/stages/in_memory_multi_source_stage.py create mode 100644 tests/_utils/stages/multi_port_pass_thru.py diff --git a/tests/_utils/stages/in_memory_multi_source_stage.py b/tests/_utils/stages/in_memory_multi_source_stage.py new file mode 100644 index 0000000000..07726a5a94 --- /dev/null +++ b/tests/_utils/stages/in_memory_multi_source_stage.py @@ -0,0 +1,59 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import mrc + +from morpheus.config import Config +from morpheus.pipeline.source_stage import SourceStage +from morpheus.pipeline.stage_schema import StageSchema + + +class InMemoryMultiSourceStage(SourceStage): + """ + In memory multi-source stage for testing purposes, accepts a 2d array `data`. + The first dimenion represents the number of output ports, and the second represents the data for each port, and + is assumed to be of a consistent type per dimension. For example, it is acceptable for data[0] to be a list of + ints, and data[1] to be a list of strings. + """ + + def __init__(self, c: Config, data: list[list[typing.Any]]): + super().__init__(c) + self._create_ports(0, len(data)) + self._data = data + + @property + def name(self) -> str: + return "multi-in-memory-source" + + def supports_cpp_node(self) -> bool: + return False + + def compute_schema(self, schema: StageSchema): + assert len(self._data) == len(schema.output_schemas), "Number of output ports must match number of data arrays" + for (port_idx, port_schema) in enumerate(schema.output_schemas): + port_schema.set_type(type(self._data[port_idx][0])) + + def _emit_data(self) -> typing.Iterator[typing.Any]: + for x in self._data: + yield x + + def _build_sources(self, builder: mrc.Builder) -> list[mrc.SegmentObject]: + sources = [] + for _ in range(len(self._data)): + sources.append(builder.make_source(self.unique_name, self._emit_data())) + + return sources diff --git a/tests/_utils/stages/multi_port_pass_thru.py b/tests/_utils/stages/multi_port_pass_thru.py new file mode 100644 index 0000000000..48f57cec92 --- /dev/null +++ b/tests/_utils/stages/multi_port_pass_thru.py @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +import mrc +import mrc.core.operators as ops + +from morpheus.config import Config +from morpheus.pipeline.pass_thru_type_mixin import PassThruTypeMixin +from morpheus.pipeline.stage import Stage + + +class MultiPassThruStage(PassThruTypeMixin, Stage): + + def __init__(self, c: Config, num_ports: int): + super().__init__(c) + self._create_ports(num_ports, num_ports) + self.num_ports = num_ports + + @property + def name(self) -> str: + return "multi-pass-thru" + + def supports_cpp_node(self): + return False + + def on_data(self, message: typing.Any): + # Return the message for the next stage + return message + + def _build(self, builder: mrc.Builder, input_nodes: list[mrc.SegmentObject]) -> list[mrc.SegmentObject]: + assert self.num_ports == len(input_nodes) + + output_nodes = [] + + for input_node in input_nodes: + node = builder.make_node(self.unique_name, ops.map(self.on_data)) + builder.make_edge(input_node, node) + output_nodes.append(node) + + return output_nodes From a7627d1430208949078951417370e8afecf3ddf6 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 6 Oct 2023 12:23:46 -0700 Subject: [PATCH 41/65] WIP [no ci] --- .../stages/in_memory_multi_source_stage.py | 6 +-- tests/_utils/stages/multi_port_pass_thru.py | 4 +- tests/pipeline/test_stage_schema.py | 48 +++++++++++++------ 3 files changed, 37 insertions(+), 21 deletions(-) diff --git a/tests/_utils/stages/in_memory_multi_source_stage.py b/tests/_utils/stages/in_memory_multi_source_stage.py index 07726a5a94..ac009c74c1 100644 --- a/tests/_utils/stages/in_memory_multi_source_stage.py +++ b/tests/_utils/stages/in_memory_multi_source_stage.py @@ -52,8 +52,4 @@ def _emit_data(self) -> typing.Iterator[typing.Any]: yield x def _build_sources(self, builder: mrc.Builder) -> list[mrc.SegmentObject]: - sources = [] - for _ in range(len(self._data)): - sources.append(builder.make_source(self.unique_name, self._emit_data())) - - return sources + return [builder.make_source(self.unique_name, self._emit_data()) for _ in range(len(self._data))] diff --git a/tests/_utils/stages/multi_port_pass_thru.py b/tests/_utils/stages/multi_port_pass_thru.py index 48f57cec92..81187bce2f 100644 --- a/tests/_utils/stages/multi_port_pass_thru.py +++ b/tests/_utils/stages/multi_port_pass_thru.py @@ -47,8 +47,8 @@ def _build(self, builder: mrc.Builder, input_nodes: list[mrc.SegmentObject]) -> output_nodes = [] - for input_node in input_nodes: - node = builder.make_node(self.unique_name, ops.map(self.on_data)) + for (port_idx, input_node) in enumerate(input_nodes): + node = builder.make_node(f"{self.unique_name}_port_{port_idx}", ops.map(self.on_data)) builder.make_edge(input_node, node) output_nodes.append(node) diff --git a/tests/pipeline/test_stage_schema.py b/tests/pipeline/test_stage_schema.py index 2a2f3c5d7d..554a008c89 100644 --- a/tests/pipeline/test_stage_schema.py +++ b/tests/pipeline/test_stage_schema.py @@ -15,18 +15,18 @@ # limitations under the License. import pytest -import typing_utils from morpheus.config import Config from morpheus.messages import MessageMeta from morpheus.pipeline import Pipeline from morpheus.pipeline.base_stage import BaseStage from morpheus.pipeline.source_stage import SourceStage -from morpheus.pipeline.stage_schema import PortSchema from morpheus.pipeline.stage_schema import StageSchema from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage from morpheus.stages.preprocess.deserialize_stage import DeserializeStage from _utils.dataset_manager import DatasetManager +from _utils.stages.in_memory_multi_source_stage import InMemoryMultiSourceStage +from _utils.stages.multi_port_pass_thru import MultiPassThruStage from _utils.stages.split_stage import SplitStage @@ -36,11 +36,20 @@ def in_mem_source_stage_fixture(config: Config, dataset_cudf: DatasetManager): yield InMemorySourceStage(config, [df]) +@pytest.fixture(name="in_mem_multi_source_stage") +def in_mem_multi_source_stage_fixture(config: Config): + data = [[1, 2, 3], ["a", "b", "c"], [1.1, 2.2, 3.3]] + yield InMemoryMultiSourceStage(config, data=data) + + def _build_ports(config: Config, source_stage: SourceStage, stage: BaseStage): pipe = Pipeline(config) pipe.add_stage(source_stage) pipe.add_stage(stage) - pipe.add_edge(source_stage, stage) + + for (port_idx, output_port) in enumerate(source_stage.output_ports): + pipe.add_edge(output_port, stage.input_ports[port_idx]) + pipe.build() @@ -52,16 +61,25 @@ def stage_fixture(config: Config, in_mem_source_stage: InMemorySourceStage): yield stage -@pytest.fixture(name="multiport_stage") -def multiport_stage_fixture(config: Config, in_mem_source_stage: InMemorySourceStage): +@pytest.fixture(name="split_stage") +def split_stage_fixture(config: Config, in_mem_source_stage: InMemorySourceStage): stage = SplitStage(config) _build_ports(config=config, source_stage=in_mem_source_stage, stage=stage) yield stage -@pytest.mark.parametrize("stage_fixture_name,num_inputs,num_outputs", [("in_mem_source_stage", 0, 1), ("stage", 1, 1), - ("multiport_stage", 1, 2)]) +@pytest.fixture(name="multi_pass_thru_stage") +def multi_pass_thru_stage_fixture(config: Config, in_mem_multi_source_stage: InMemoryMultiSourceStage): + stage = MultiPassThruStage(config, num_ports=3) + _build_ports(config=config, source_stage=in_mem_multi_source_stage, stage=stage) + + yield stage + + +@pytest.mark.parametrize("stage_fixture_name,num_inputs,num_outputs", + [("in_mem_source_stage", 0, 1), ("in_mem_multi_source_stage", 0, 3), ("stage", 1, 1), + ("split_stage", 1, 2), ("multi_pass_thru_stage", 3, 3)]) def test_constructor(request: pytest.FixtureRequest, stage_fixture_name: str, num_inputs: int, num_outputs: int): stage = request.getfixturevalue(stage_fixture_name) schema = StageSchema(stage) @@ -98,22 +116,24 @@ def test_single_port_output_schemas(in_mem_source_stage: InMemorySourceStage): assert schema.output_schema is port_schema -def test_multi_port_output_schemas(multiport_stage: SplitStage): - schema = StageSchema(multiport_stage) - multiport_stage.compute_schema(schema) +def test_multi_port_output_schemas(split_stage: SplitStage): + schema = StageSchema(split_stage) + split_stage.compute_schema(schema) assert len(schema.output_schemas) == 2 for port_schema in schema.output_schemas: assert port_schema.get_type() is MessageMeta -def test_output_schema_multi_error(multiport_stage: SplitStage): +@pytest.mark.parametrize("stage_fixture_name", ["split_stage", "multi_pass_thru_stage"]) +def test_output_schema_multi_error(request: pytest.FixtureRequest, stage_fixture_name: str): """ Test confirms that the output_schema property raises an error when there are multiple output schemas """ - schema = StageSchema(multiport_stage) - multiport_stage.compute_schema(schema) - assert len(schema.output_schemas) == 2 + stage = request.getfixturevalue(stage_fixture_name) + schema = StageSchema(stage) + stage.compute_schema(schema) + assert len(schema.output_schemas) > 1 with pytest.raises(AssertionError): schema.output_schema From 17b7941dca9d747a1e5bbae74d270fad352c064b Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 6 Oct 2023 13:08:30 -0700 Subject: [PATCH 42/65] WIP tests [no ci] --- tests/pipeline/test_stage_schema.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/pipeline/test_stage_schema.py b/tests/pipeline/test_stage_schema.py index 554a008c89..c2f0ff0829 100644 --- a/tests/pipeline/test_stage_schema.py +++ b/tests/pipeline/test_stage_schema.py @@ -77,6 +77,7 @@ def multi_pass_thru_stage_fixture(config: Config, in_mem_multi_source_stage: InM yield stage +# Fixtures cannot be used directly as paramertize values, but we can fetch them by name @pytest.mark.parametrize("stage_fixture_name,num_inputs,num_outputs", [("in_mem_source_stage", 0, 1), ("in_mem_multi_source_stage", 0, 3), ("stage", 1, 1), ("split_stage", 1, 2), ("multi_pass_thru_stage", 3, 3)]) @@ -137,3 +138,20 @@ def test_output_schema_multi_error(request: pytest.FixtureRequest, stage_fixture with pytest.raises(AssertionError): schema.output_schema + + +@pytest.mark.parametrize( + "stage_fixture_name", + ["in_mem_source_stage", "in_mem_multi_source_stage", "stage", "split_stage", "multi_pass_thru_stage"]) +def test_complete(request: pytest.FixtureRequest, stage_fixture_name: str): + stage = request.getfixturevalue(stage_fixture_name) + schema = StageSchema(stage) + stage.compute_schema(schema) + + for port_schema in schema.output_schemas: + assert not port_schema.is_complete() + + schema._complete() + + for port_schema in schema.output_schemas: + assert port_schema.is_complete() From 4dcaad317d3e80c42053c3edb5c79f3816e414be Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 6 Oct 2023 13:29:25 -0700 Subject: [PATCH 43/65] Move pipeline tests into tests/pipeline --- tests/{ => pipeline}/test_pipe_viz.py | 0 tests/{ => pipeline}/test_pipeline.py | 0 tests/{ => pipeline}/test_preallocation_pipe.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename tests/{ => pipeline}/test_pipe_viz.py (100%) rename tests/{ => pipeline}/test_pipeline.py (100%) rename tests/{ => pipeline}/test_preallocation_pipe.py (100%) diff --git a/tests/test_pipe_viz.py b/tests/pipeline/test_pipe_viz.py similarity index 100% rename from tests/test_pipe_viz.py rename to tests/pipeline/test_pipe_viz.py diff --git a/tests/test_pipeline.py b/tests/pipeline/test_pipeline.py similarity index 100% rename from tests/test_pipeline.py rename to tests/pipeline/test_pipeline.py diff --git a/tests/test_preallocation_pipe.py b/tests/pipeline/test_preallocation_pipe.py similarity index 100% rename from tests/test_preallocation_pipe.py rename to tests/pipeline/test_preallocation_pipe.py From 9ee5601ddcb8976b9d6600d514dc69910286e84e Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 6 Oct 2023 13:57:57 -0700 Subject: [PATCH 44/65] Assert that when stages are passed into the add_edge method they only have a single port [no ci] --- morpheus/pipeline/pipeline.py | 13 ++++ tests/_utils/stages/multi_port_pass_thru.py | 2 +- tests/pipeline/conftest.py | 75 +++++++++++++++++++++ tests/pipeline/test_pipeline.py | 36 ++++++++++ tests/pipeline/test_stage_schema.py | 54 --------------- 5 files changed, 125 insertions(+), 55 deletions(-) create mode 100644 tests/pipeline/conftest.py diff --git a/morpheus/pipeline/pipeline.py b/morpheus/pipeline/pipeline.py index 5acd1a0245..61c7f1e039 100644 --- a/morpheus/pipeline/pipeline.py +++ b/morpheus/pipeline/pipeline.py @@ -136,6 +136,7 @@ def add_edge(self, segment_id: str = "main"): """ Create an edge between two stages and add it to a segment in the pipeline. + When `start` and `end` are stages, they must have exactly one output and input port respectively. Parameters ---------- @@ -151,12 +152,24 @@ def add_edge(self, self._assert_not_built() if (isinstance(start, BaseStage)): + assert len(start.output_ports) > 0, \ + "Cannot call `add_edge` with a stage with no output ports as the `start` parameter" + assert len(start.output_ports) == 1, \ + ("Cannot call `add_edge` with a stage with with multiple output ports as the `start` parameter, " + "instead `add_edge` must be called for each output port individually.") start_port = start.output_ports[0] + elif (isinstance(start, Sender)): start_port = start if (isinstance(end, Stage)): + assert len(end.input_ports) > 0, \ + "Cannot call `add_edge` with a stage with no input ports as the `end` parameter" + assert len(end.input_ports) == 1, \ + ("Cannot call `add_edge` with a stage with with multiple input ports as the `end` parameter, " + "instead `add_edge` must be called for each input port individually.") end_port = end.input_ports[0] + elif (isinstance(end, Receiver)): end_port = end diff --git a/tests/_utils/stages/multi_port_pass_thru.py b/tests/_utils/stages/multi_port_pass_thru.py index 81187bce2f..d2d31bb909 100644 --- a/tests/_utils/stages/multi_port_pass_thru.py +++ b/tests/_utils/stages/multi_port_pass_thru.py @@ -24,7 +24,7 @@ from morpheus.pipeline.stage import Stage -class MultiPassThruStage(PassThruTypeMixin, Stage): +class MultiPortPassThruStage(PassThruTypeMixin, Stage): def __init__(self, c: Config, num_ports: int): super().__init__(c) diff --git a/tests/pipeline/conftest.py b/tests/pipeline/conftest.py new file mode 100644 index 0000000000..7f21ff0eff --- /dev/null +++ b/tests/pipeline/conftest.py @@ -0,0 +1,75 @@ +#!/usr/bin/env python +# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from _utils.dataset_manager import DatasetManager +from _utils.stages.in_memory_multi_source_stage import InMemoryMultiSourceStage +from _utils.stages.multi_port_pass_thru import MultiPortPassThruStage +from _utils.stages.split_stage import SplitStage +from morpheus.config import Config +from morpheus.pipeline import Pipeline +from morpheus.pipeline.base_stage import BaseStage +from morpheus.pipeline.source_stage import SourceStage +from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage +from morpheus.stages.preprocess.deserialize_stage import DeserializeStage + + +@pytest.fixture(name="in_mem_source_stage") +def in_mem_source_stage_fixture(config: Config, dataset_cudf: DatasetManager): + df = dataset_cudf['filter_probs.csv'] + yield InMemorySourceStage(config, [df]) + + +@pytest.fixture(name="in_mem_multi_source_stage") +def in_mem_multi_source_stage_fixture(config: Config): + data = [[1, 2, 3], ["a", "b", "c"], [1.1, 2.2, 3.3]] + yield InMemoryMultiSourceStage(config, data=data) + + +def _build_ports(config: Config, source_stage: SourceStage, stage: BaseStage): + pipe = Pipeline(config) + pipe.add_stage(source_stage) + pipe.add_stage(stage) + + for (port_idx, output_port) in enumerate(source_stage.output_ports): + pipe.add_edge(output_port, stage.input_ports[port_idx]) + + pipe.build() + + +@pytest.fixture(name="stage") +def stage_fixture(config: Config, in_mem_source_stage: InMemorySourceStage): + stage = DeserializeStage(config) + _build_ports(config=config, source_stage=in_mem_source_stage, stage=stage) + + yield stage + + +@pytest.fixture(name="split_stage") +def split_stage_fixture(config: Config, in_mem_source_stage: InMemorySourceStage): + stage = SplitStage(config) + _build_ports(config=config, source_stage=in_mem_source_stage, stage=stage) + + yield stage + + +@pytest.fixture(name="multi_pass_thru_stage") +def multi_pass_thru_stage_fixture(config: Config, in_mem_multi_source_stage: InMemoryMultiSourceStage): + stage = MultiPortPassThruStage(config, num_ports=3) + _build_ports(config=config, source_stage=in_mem_multi_source_stage, stage=stage) + + yield stage diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index fee3de0610..630c27950e 100755 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -21,9 +21,13 @@ from _utils import assert_results from _utils.stages.conv_msg import ConvMsg +from _utils.stages.in_memory_source_x_stage import InMemSourceXStage +from _utils.stages.in_memory_multi_source_stage import InMemoryMultiSourceStage from _utils.stages.multi_message_pass_thru import MultiMessagePassThruStage +from _utils.stages.multi_port_pass_thru import MultiPortPassThruStage from morpheus.config import Config from morpheus.pipeline import LinearPipeline +from morpheus.pipeline import Pipeline from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage from morpheus.stages.output.compare_dataframe_stage import CompareDataFrameStage from morpheus.stages.output.in_memory_sink_stage import InMemorySinkStage @@ -159,3 +163,35 @@ def test_pipeline_narrowing_types(config: Config, filter_probs_df: DataFrameType pipe.run() assert_results(compare_stage.get_results()) + + +@pytest.mark.parametrize("num_outputs", [0, 2, 3]) +def test_add_edge_output_port_errors(config: Config, num_outputs: int): + """ + Calling add_edge where start has either no output ports or multiple output ports should cause an assertion error. + """ + data = [list(range(3)) for _ in range(num_outputs)] + start_stage = InMemoryMultiSourceStage(config, data=data) + + pipe = Pipeline(config) + pipe.add_stage(start_stage) + end_stage = pipe.add_stage(ConvMsg(config)) + + with pytest.raises(AssertionError): + pipe.add_edge(start_stage, end_stage.input_ports[0]) + + +@pytest.mark.parametrize("num_inputs", [0, 2, 3]) +def test_add_edge_input_port_errors(config: Config, num_inputs: int): + """ + Calling add_edge where end has either no input ports or multiple input ports should cause an assertion error. + """ + start_stage = InMemSourceXStage(config, data=list(range(3))) + end_stage = MultiPortPassThruStage(config, num_ports=num_inputs) + + pipe = Pipeline(config) + pipe.add_stage(start_stage) + pipe.add_stage(end_stage) + + with pytest.raises(AssertionError): + pipe.add_edge(start_stage.output_ports[0], end_stage) diff --git a/tests/pipeline/test_stage_schema.py b/tests/pipeline/test_stage_schema.py index c2f0ff0829..ec2ae19963 100644 --- a/tests/pipeline/test_stage_schema.py +++ b/tests/pipeline/test_stage_schema.py @@ -16,67 +16,13 @@ import pytest -from morpheus.config import Config from morpheus.messages import MessageMeta -from morpheus.pipeline import Pipeline -from morpheus.pipeline.base_stage import BaseStage -from morpheus.pipeline.source_stage import SourceStage from morpheus.pipeline.stage_schema import StageSchema from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage from morpheus.stages.preprocess.deserialize_stage import DeserializeStage -from _utils.dataset_manager import DatasetManager -from _utils.stages.in_memory_multi_source_stage import InMemoryMultiSourceStage -from _utils.stages.multi_port_pass_thru import MultiPassThruStage from _utils.stages.split_stage import SplitStage -@pytest.fixture(name="in_mem_source_stage") -def in_mem_source_stage_fixture(config: Config, dataset_cudf: DatasetManager): - df = dataset_cudf['filter_probs.csv'] - yield InMemorySourceStage(config, [df]) - - -@pytest.fixture(name="in_mem_multi_source_stage") -def in_mem_multi_source_stage_fixture(config: Config): - data = [[1, 2, 3], ["a", "b", "c"], [1.1, 2.2, 3.3]] - yield InMemoryMultiSourceStage(config, data=data) - - -def _build_ports(config: Config, source_stage: SourceStage, stage: BaseStage): - pipe = Pipeline(config) - pipe.add_stage(source_stage) - pipe.add_stage(stage) - - for (port_idx, output_port) in enumerate(source_stage.output_ports): - pipe.add_edge(output_port, stage.input_ports[port_idx]) - - pipe.build() - - -@pytest.fixture(name="stage") -def stage_fixture(config: Config, in_mem_source_stage: InMemorySourceStage): - stage = DeserializeStage(config) - _build_ports(config=config, source_stage=in_mem_source_stage, stage=stage) - - yield stage - - -@pytest.fixture(name="split_stage") -def split_stage_fixture(config: Config, in_mem_source_stage: InMemorySourceStage): - stage = SplitStage(config) - _build_ports(config=config, source_stage=in_mem_source_stage, stage=stage) - - yield stage - - -@pytest.fixture(name="multi_pass_thru_stage") -def multi_pass_thru_stage_fixture(config: Config, in_mem_multi_source_stage: InMemoryMultiSourceStage): - stage = MultiPassThruStage(config, num_ports=3) - _build_ports(config=config, source_stage=in_mem_multi_source_stage, stage=stage) - - yield stage - - # Fixtures cannot be used directly as paramertize values, but we can fetch them by name @pytest.mark.parametrize("stage_fixture_name,num_inputs,num_outputs", [("in_mem_source_stage", 0, 1), ("in_mem_multi_source_stage", 0, 3), ("stage", 1, 1), From 65ff2c5bd953ceef4fe218b0c0b429c192c646dd Mon Sep 17 00:00:00 2001 From: David Gardner Date: Fri, 6 Oct 2023 14:20:39 -0700 Subject: [PATCH 45/65] Remove unused imports [no ci] --- tests/test_databricks_deltalake_source_stage.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_databricks_deltalake_source_stage.py b/tests/test_databricks_deltalake_source_stage.py index bbe364f896..2c48521001 100644 --- a/tests/test_databricks_deltalake_source_stage.py +++ b/tests/test_databricks_deltalake_source_stage.py @@ -14,10 +14,6 @@ from unittest.mock import patch -import pytest - -import cudf - from _utils import assert_results from _utils.dataset_manager import DatasetManager from morpheus.config import Config From f3a7fa105f07ac00fa08848624d5dbf7b4b76de5 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 9 Oct 2023 08:56:35 -0700 Subject: [PATCH 46/65] Remove TODO comment, will resolve docs in separate PR --- examples/developer_guide/1_simple_python_stage/pass_thru.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/developer_guide/1_simple_python_stage/pass_thru.py b/examples/developer_guide/1_simple_python_stage/pass_thru.py index b3daad6307..0ef237faa6 100644 --- a/examples/developer_guide/1_simple_python_stage/pass_thru.py +++ b/examples/developer_guide/1_simple_python_stage/pass_thru.py @@ -23,7 +23,6 @@ from morpheus.pipeline.single_port_stage import SinglePortStage -# TODO: Document the PassThruTypeMixin @register_stage("pass-thru") class PassThruStage(PassThruTypeMixin, SinglePortStage): """ From 18438cd3f782fbaef503efb3fc3779897ffa5dfa Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 9 Oct 2023 08:56:57 -0700 Subject: [PATCH 47/65] Ignore pylint warning --- tests/pipeline/test_stage_schema.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipeline/test_stage_schema.py b/tests/pipeline/test_stage_schema.py index ec2ae19963..133e45ce90 100644 --- a/tests/pipeline/test_stage_schema.py +++ b/tests/pipeline/test_stage_schema.py @@ -83,7 +83,7 @@ def test_output_schema_multi_error(request: pytest.FixtureRequest, stage_fixture assert len(schema.output_schemas) > 1 with pytest.raises(AssertionError): - schema.output_schema + schema.output_schema # pylint: disable=pointless-statement @pytest.mark.parametrize( From e0a0ea2d2a0de82a84a511247b2eef2ec7095e98 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Mon, 9 Oct 2023 08:58:04 -0700 Subject: [PATCH 48/65] Fix import sorting --- morpheus/pipeline/pipeline.py | 2 +- tests/pipeline/test_pipeline.py | 2 +- tests/pipeline/test_stage_schema.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/morpheus/pipeline/pipeline.py b/morpheus/pipeline/pipeline.py index 61c7f1e039..9395272385 100644 --- a/morpheus/pipeline/pipeline.py +++ b/morpheus/pipeline/pipeline.py @@ -29,12 +29,12 @@ import cudf from morpheus.config import Config +from morpheus.pipeline.base_stage import BaseStage from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.receiver import Receiver from morpheus.pipeline.sender import Sender from morpheus.pipeline.source_stage import SourceStage from morpheus.pipeline.stage import Stage -from morpheus.pipeline.base_stage import BaseStage from morpheus.utils.type_utils import pretty_print_type_name logger = logging.getLogger(__name__) diff --git a/tests/pipeline/test_pipeline.py b/tests/pipeline/test_pipeline.py index 630c27950e..40ca0b9612 100755 --- a/tests/pipeline/test_pipeline.py +++ b/tests/pipeline/test_pipeline.py @@ -21,8 +21,8 @@ from _utils import assert_results from _utils.stages.conv_msg import ConvMsg -from _utils.stages.in_memory_source_x_stage import InMemSourceXStage from _utils.stages.in_memory_multi_source_stage import InMemoryMultiSourceStage +from _utils.stages.in_memory_source_x_stage import InMemSourceXStage from _utils.stages.multi_message_pass_thru import MultiMessagePassThruStage from _utils.stages.multi_port_pass_thru import MultiPortPassThruStage from morpheus.config import Config diff --git a/tests/pipeline/test_stage_schema.py b/tests/pipeline/test_stage_schema.py index 133e45ce90..30965d7537 100644 --- a/tests/pipeline/test_stage_schema.py +++ b/tests/pipeline/test_stage_schema.py @@ -16,11 +16,11 @@ import pytest +from _utils.stages.split_stage import SplitStage from morpheus.messages import MessageMeta from morpheus.pipeline.stage_schema import StageSchema from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage from morpheus.stages.preprocess.deserialize_stage import DeserializeStage -from _utils.stages.split_stage import SplitStage # Fixtures cannot be used directly as paramertize values, but we can fetch them by name From 0772f87963989cd12214bf4adf6fa127701b969f Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Wed, 11 Oct 2023 06:55:01 -0700 Subject: [PATCH 49/65] Update to clang-16 & boost-1.82 (#1186) * Silence deprecation warnings from SimpleAmqpClient #1255 fixes #1185 Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Devin Robison (https://github.com/drobison00) URL: https://github.com/nv-morpheus/Morpheus/pull/1186 --- ci/conda/recipes/morpheus/conda_build_config.yaml | 2 +- ci/scripts/common.sh | 4 ++-- docker/conda/environments/cuda11.8_dev.yml | 6 +++--- .../cmake/Configure_SimpleAmqpClient.cmake | 5 +++-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/ci/conda/recipes/morpheus/conda_build_config.yaml b/ci/conda/recipes/morpheus/conda_build_config.yaml index b4d6fe33d0..f6bfd9e8d5 100644 --- a/ci/conda/recipes/morpheus/conda_build_config.yaml +++ b/ci/conda/recipes/morpheus/conda_build_config.yaml @@ -29,7 +29,7 @@ python: - 3.10 boost: - - 1.74 + - 1.82 rapids_version: - 23.06 diff --git a/ci/scripts/common.sh b/ci/scripts/common.sh index 4847041f15..d942aec853 100644 --- a/ci/scripts/common.sh +++ b/ci/scripts/common.sh @@ -43,8 +43,8 @@ export SKIP_YAPF=${SKIP_YAPF:-""} # Set BUILD_DIR to use a different build folder export BUILD_DIR=${BUILD_DIR:-"${MORPHEUS_ROOT}/build"} -# Speficy the clang-tools version to use. Default 14 -export CLANG_TOOLS_VERSION=${CLANG_TOOLS_VERSION:-14} +# Speficy the clang-tools version to use. Default 16 +export CLANG_TOOLS_VERSION=${CLANG_TOOLS_VERSION:-16} # Returns the `branch-YY.MM` that is used as the base for merging function get_base_branch() { diff --git a/docker/conda/environments/cuda11.8_dev.yml b/docker/conda/environments/cuda11.8_dev.yml index 66a2d11448..2df2b75fa4 100644 --- a/docker/conda/environments/cuda11.8_dev.yml +++ b/docker/conda/environments/cuda11.8_dev.yml @@ -25,10 +25,10 @@ dependencies: ####### Morpheus Dependencies (keep sorted!) ####### - automake=1.16.5 - benchmark=1.6.1 - - boost-cpp=1.74 + - boost-cpp=1.82 - cachetools=5.0.0 - ccache>=3.7 - - clangdev=14 + - clangdev=16 - click >=8 - cmake=3.24 - configargparse=1.5 @@ -58,7 +58,7 @@ dependencies: - grpcio - gtest>=1.13.0 - gxx_linux-64=11.2 - - include-what-you-use=0.18 + - include-what-you-use=0.20 - ipywidgets - isort - jupyter_core>=4.11.2,<5.0 diff --git a/examples/developer_guide/4_rabbitmq_cpp_stage/cmake/Configure_SimpleAmqpClient.cmake b/examples/developer_guide/4_rabbitmq_cpp_stage/cmake/Configure_SimpleAmqpClient.cmake index 1ba113026b..7aa248e8e1 100644 --- a/examples/developer_guide/4_rabbitmq_cpp_stage/cmake/Configure_SimpleAmqpClient.cmake +++ b/examples/developer_guide/4_rabbitmq_cpp_stage/cmake/Configure_SimpleAmqpClient.cmake @@ -41,8 +41,9 @@ function(find_and_configure_SimpleAmqpClient version) # Needed to pick up the generated export.h target_include_directories(SimpleAmqpClient PUBLIC "${rabbitmq_BINARY_DIR}/include") - # Suppress #warning deprecation messages from rabbitmq - target_compile_options(SimpleAmqpClient PRIVATE -Wno-cpp) + # Suppress #warning deprecation messages from rabbitmq and SimpleAmqpClient + # https://github.com/nv-morpheus/Morpheus/issues/1255 + target_compile_options(SimpleAmqpClient PRIVATE -Wno-cpp -DBOOST_DISABLE_PRAGMA_MESSAGE) endfunction() From 20afe305d3ac32c7ab29253d6557dbf97eb8172f Mon Sep 17 00:00:00 2001 From: David Gardner Date: Wed, 11 Oct 2023 07:12:37 -0700 Subject: [PATCH 50/65] Revert unintentional change --- docs/source/developer_guide/guides/3_simple_cpp_stage.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/source/developer_guide/guides/3_simple_cpp_stage.md b/docs/source/developer_guide/guides/3_simple_cpp_stage.md index 90710a97a9..f3caa45213 100644 --- a/docs/source/developer_guide/guides/3_simple_cpp_stage.md +++ b/docs/source/developer_guide/guides/3_simple_cpp_stage.md @@ -336,11 +336,13 @@ PYBIND11_MODULE(morpheus_example, m) ### Python Changes -We need to make a few minor adjustments to our Python implementation of the `PassThruStage`. +We need to make a few minor adjustments to our Python implementation of the `PassThruStage`. First, we import the new `morpheus_example` Python module we created in the previous section. -As mentioned in the previous section, we will need to change the return value of the `supports_cpp_node` method to indicate that our stage now supports a C++ implementation. Our `_build_single` method needs to be updated to build a C++ node when `morpheus.config.CppConfig.get_should_use_cpp()` is `True` using the `self._build_cpp_node()` method. The `_build_cpp_node()` method compares both `morpheus.config.CppConfig.get_should_use_cpp()` and `supports_cpp_node()` and returns `True` only when both methods return `True`. +```python +from _lib import morpheus_example as morpheus_example_cpp +``` - We lazily import the new `morpheus_example` Python module we created in the previous section only when we need to build a C++ node since importing a compiled extension module comes at a performance cost. +As mentioned in the previous section, we will need to change the return value of the `supports_cpp_node` method to indicate that our stage now supports a C++ implementation. Our `_build_single` method needs to be updated to build a C++ node when `morpheus.config.CppConfig.get_should_use_cpp()` is `True` using the `self._build_cpp_node()` method. The `_build_cpp_node()` method compares both `morpheus.config.CppConfig.get_should_use_cpp()` and `supports_cpp_node()` and returns `True` only when both methods return `True`. ```python def supports_cpp_node(self): @@ -349,7 +351,6 @@ def supports_cpp_node(self): ```python def _build_single(self, builder: mrc.Builder, input_stream: StreamPair) -> StreamPair: if self._build_cpp_node(): - from _lib import morpheus_example as morpheus_example_cpp node = morpheus_example_cpp.PassThruStage(builder, self.unique_name) else: node = builder.make_node(self.unique_name, ops.map(self.on_data)) From 6ad39c8cb2355ed8cc1e74499ee2aa8c0352fc04 Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Wed, 11 Oct 2023 09:36:51 -0600 Subject: [PATCH 51/65] Updating versions for v23.07.03 --- CMakeLists.txt | 2 +- examples/digital_fingerprinting/production/Dockerfile | 2 +- examples/digital_fingerprinting/production/docker-compose.yml | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2d0b7716c0..f59d305fed 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -98,7 +98,7 @@ morpheus_utils_initialize_cuda_arch(morpheus) rapids_cmake_build_type(Release) # Project definition project(morpheus - VERSION 23.07.00 + VERSION 23.07.03 LANGUAGES C CXX CUDA) rapids_cmake_write_version_file(${CMAKE_BINARY_DIR}/autogenerated/include/morpheus/version.hpp) diff --git a/examples/digital_fingerprinting/production/Dockerfile b/examples/digital_fingerprinting/production/Dockerfile index c33291cc6a..778b63b7cd 100644 --- a/examples/digital_fingerprinting/production/Dockerfile +++ b/examples/digital_fingerprinting/production/Dockerfile @@ -14,7 +14,7 @@ # limitations under the License. ARG MORPHEUS_CONTAINER=nvcr.io/nvidia/morpheus/morpheus -ARG MORPHEUS_CONTAINER_VERSION=v23.07.00-runtime +ARG MORPHEUS_CONTAINER_VERSION=v23.07.03-runtime FROM ${MORPHEUS_CONTAINER}:${MORPHEUS_CONTAINER_VERSION} as base diff --git a/examples/digital_fingerprinting/production/docker-compose.yml b/examples/digital_fingerprinting/production/docker-compose.yml index d9bf53badb..0263444f5b 100644 --- a/examples/digital_fingerprinting/production/docker-compose.yml +++ b/examples/digital_fingerprinting/production/docker-compose.yml @@ -77,7 +77,7 @@ services: target: jupyter args: - MORPHEUS_CONTAINER=${MORPHEUS_CONTAINER:-nvcr.io/nvidia/morpheus/morpheus} - - MORPHEUS_CONTAINER_VERSION=${MORPHEUS_CONTAINER_VERSION:-v23.07.00-runtime} + - MORPHEUS_CONTAINER_VERSION=${MORPHEUS_CONTAINER_VERSION:-v23.07.03-runtime} deploy: resources: reservations: @@ -109,7 +109,7 @@ services: target: runtime args: - MORPHEUS_CONTAINER=${MORPHEUS_CONTAINER:-nvcr.io/nvidia/morpheus/morpheus} - - MORPHEUS_CONTAINER_VERSION=${MORPHEUS_CONTAINER_VERSION:-v23.07.00-runtime} + - MORPHEUS_CONTAINER_VERSION=${MORPHEUS_CONTAINER_VERSION:-v23.07.03-runtime} image: dfp_morpheus container_name: morpheus_pipeline deploy: From 82b291a7ab12435f9a9c3da283b29a4c39b63d5c Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Wed, 11 Oct 2023 09:42:40 -0600 Subject: [PATCH 52/65] Updating CHANGELOG --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7ac9de50f..aaca14e549 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,13 @@ See the License for the specific language governing permissions and limitations under the License. --> +# Morpheus 23.07.03 (11 Oct 2023) + +## 🐛 Bug Fixes +- Add pinned libwebp to resolve CVE ([#1236](https://github.com/nv-morpheus/Morpheus/pull/1236)) [@drobison00](https://github.com/drobison00) +- Add libwebp to meta.yaml for CVE 2307 ([#1242](https://github.com/nv-morpheus/Morpheus/pull/1242)) [@drobison00](https://github.com/drobison00) +- [BUG] Fix Control Message Utils & SQL Max Connections Exhaust ([#1243](https://github.com/nv-morpheus/Morpheus/pull/1243)) [@bsuryadevara](https://github.com/bsuryadevara) + # Morpheus 23.07.02 (25 Jul 2023) ## 🐛 Bug Fixes From 5443da9b10642fb4095927648887ac9040489e3b Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Wed, 11 Oct 2023 22:32:43 +0000 Subject: [PATCH 53/65] Fixing an issue with the incorrect version being used to register a module from C++ --- morpheus/_lib/modules/module.cpp | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/morpheus/_lib/modules/module.cpp b/morpheus/_lib/modules/module.cpp index a63bb75aa0..b315b321d8 100644 --- a/morpheus/_lib/modules/module.cpp +++ b/morpheus/_lib/modules/module.cpp @@ -19,9 +19,11 @@ #include "morpheus/utilities/string_util.hpp" #include "morpheus/version.hpp" +#include #include #include #include // for arg, init, class_, module_, str_attr_accessor, PYBIND11_MODULE, pybind11 +#include #include #include @@ -39,11 +41,19 @@ PYBIND11_MODULE(modules, _module) )pbdoc"; - const std::vector MorpheusModuleVersion{ - morpheus_VERSION_MAJOR, morpheus_VERSION_MINOR, morpheus_VERSION_PATCH}; + // Get the MRC version that we are registering these modules for. Ideally, this would be able to get it directly + // from but that file isnt exported + std::vector mrc_version; - mrc::modules::ModelRegistryUtil::create_registered_module( - "DataLoader", "morpheus", MorpheusModuleVersion); + auto mrc_version_list = pybind11::module_::import("mrc").attr("__version__").attr("split")(".").cast(); + + for (const auto& l : mrc_version_list) + { + auto i = py::int_(py::reinterpret_borrow(l)); + mrc_version.push_back(i.cast()); + } + + mrc::modules::ModelRegistryUtil::create_registered_module("DataLoader", "morpheus", mrc_version); _module.attr("__version__") = MORPHEUS_CONCAT_STR(morpheus_VERSION_MAJOR << "." << morpheus_VERSION_MINOR << "." << morpheus_VERSION_PATCH); From 06bda2d0dd62379273e34a01a8199c50da2e7f5d Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Wed, 11 Oct 2023 23:35:09 +0000 Subject: [PATCH 54/65] IWYU fixes --- morpheus/_lib/modules/module.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/morpheus/_lib/modules/module.cpp b/morpheus/_lib/modules/module.cpp index b315b321d8..33e0de09d3 100644 --- a/morpheus/_lib/modules/module.cpp +++ b/morpheus/_lib/modules/module.cpp @@ -19,12 +19,13 @@ #include "morpheus/utilities/string_util.hpp" #include "morpheus/version.hpp" -#include #include #include +#include // for object_api::operator(), object::cast #include // for arg, init, class_, module_, str_attr_accessor, PYBIND11_MODULE, pybind11 #include +#include // for array #include #include From e92e6984f2e95b8db4080c567f9a161fa3179ce3 Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Thu, 12 Oct 2023 19:27:36 -0700 Subject: [PATCH 55/65] Loosen nodejs version restriction (#1262) * Set nodejs version to 18.* Closes #1259 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1262 --- docker/conda/environments/cuda11.8_dev.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/conda/environments/cuda11.8_dev.yml b/docker/conda/environments/cuda11.8_dev.yml index 2df2b75fa4..7c1a034038 100644 --- a/docker/conda/environments/cuda11.8_dev.yml +++ b/docker/conda/environments/cuda11.8_dev.yml @@ -70,7 +70,7 @@ dependencies: - mrc=23.11 - networkx=3.1 - ninja=1.10 - - nodejs=18.15.0 + - nodejs=18.* - numba>=0.56.2 - numpydoc=1.4 - nvtabular=23.06 From 502d923296829be38d7132fd016b6147f989c005 Mon Sep 17 00:00:00 2001 From: Michael Demoret Date: Fri, 13 Oct 2023 10:43:11 -0600 Subject: [PATCH 56/65] Updating out of date version --- examples/digital_fingerprinting/production/docker-compose.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/digital_fingerprinting/production/docker-compose.yml b/examples/digital_fingerprinting/production/docker-compose.yml index 7b17d7509f..fc2aa2edd2 100644 --- a/examples/digital_fingerprinting/production/docker-compose.yml +++ b/examples/digital_fingerprinting/production/docker-compose.yml @@ -109,7 +109,7 @@ services: target: runtime args: - MORPHEUS_CONTAINER=${MORPHEUS_CONTAINER:-nvcr.io/nvidia/morpheus/morpheus} - - MORPHEUS_CONTAINER_VERSION=${MORPHEUS_CONTAINER_VERSION:-v23.07.00-runtime} + - MORPHEUS_CONTAINER_VERSION=${MORPHEUS_CONTAINER_VERSION:-v23.11.00-runtime} image: dfp_morpheus container_name: morpheus_pipeline deploy: From 9348842941c298de2e1d61264ac5634a57071986 Mon Sep 17 00:00:00 2001 From: David Gardner <96306125+dagardner-nv@users.noreply.github.com> Date: Fri, 13 Oct 2023 10:20:05 -0700 Subject: [PATCH 57/65] Fix documentation for morpheus.loaders.sql_loader (#1264) Add `@wraps` decorator to `cm_skip_processing_if_failed` to allow docstrings for wrapped methods to generate documentation. ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - David Gardner (https://github.com/dagardner-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1264 --- morpheus/utils/control_message_utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/morpheus/utils/control_message_utils.py b/morpheus/utils/control_message_utils.py index 7d6d7a9254..a600e3ec97 100644 --- a/morpheus/utils/control_message_utils.py +++ b/morpheus/utils/control_message_utils.py @@ -37,6 +37,7 @@ def cm_skip_processing_if_failed(func: Callable[CM_SKIP_P, T]) -> Callable[CM_SK The decorated function. """ + @wraps(func) def wrapper(control_message: ControlMessage, *args: CM_SKIP_P.args, **kwargs: CM_SKIP_P.kwargs) -> T: if (control_message.has_metadata("cm_failed") and control_message.get_metadata("cm_failed")): return control_message From c32090333212f91d4d1315ce3c3a0497cd315756 Mon Sep 17 00:00:00 2001 From: Bhargav Suryadevara Date: Fri, 13 Oct 2023 12:44:31 -0500 Subject: [PATCH 58/65] Add a Vector Database Service to allow stages to read and write to VDBs (#1225) Closes #1177 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - Bhargav Suryadevara (https://github.com/bsuryadevara) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1225 --- docker/conda/environments/cuda11.8_dev.yml | 2 + docs/source/conf.py | 1 + morpheus/service/__init__.py | 13 + morpheus/service/milvus_client.py | 268 ++++++++ morpheus/service/milvus_vector_db_service.py | 600 ++++++++++++++++++ morpheus/service/vector_db_service.py | 323 ++++++++++ morpheus/stages/output/write_to_vector_db.py | 123 ++++ morpheus/utils/vector_db_service_utils.py | 55 ++ tests/conftest.py | 28 + tests/test_milvus_vector_db_service.py | 428 +++++++++++++ ...st_milvus_write_to_vector_db_stage_pipe.py | 127 ++++ .../milvus_idx_part_collection_conf.json | 3 + .../milvus_simple_collection_conf.json | 3 + 13 files changed, 1974 insertions(+) create mode 100644 morpheus/service/__init__.py create mode 100644 morpheus/service/milvus_client.py create mode 100644 morpheus/service/milvus_vector_db_service.py create mode 100644 morpheus/service/vector_db_service.py create mode 100644 morpheus/stages/output/write_to_vector_db.py create mode 100644 morpheus/utils/vector_db_service_utils.py create mode 100644 tests/test_milvus_vector_db_service.py create mode 100755 tests/test_milvus_write_to_vector_db_stage_pipe.py create mode 100644 tests/tests_data/service/milvus_idx_part_collection_conf.json create mode 100644 tests/tests_data/service/milvus_simple_collection_conf.json diff --git a/docker/conda/environments/cuda11.8_dev.yml b/docker/conda/environments/cuda11.8_dev.yml index 7c1a034038..4a7dc39688 100644 --- a/docker/conda/environments/cuda11.8_dev.yml +++ b/docker/conda/environments/cuda11.8_dev.yml @@ -110,3 +110,5 @@ dependencies: # Add additional dev dependencies here - databricks-connect - pytest-kafka==0.6.0 + - pymilvus==2.3.1 + - milvus diff --git a/docs/source/conf.py b/docs/source/conf.py index adb924692f..ff3f98be38 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -169,6 +169,7 @@ "morpheus.cli.commands", # Dont document the CLI in Sphinx "nvtabular", "pandas", + "pymilvus", "tensorrt", "torch", "tqdm", diff --git a/morpheus/service/__init__.py b/morpheus/service/__init__.py new file mode 100644 index 0000000000..ce94db52fa --- /dev/null +++ b/morpheus/service/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/morpheus/service/milvus_client.py b/morpheus/service/milvus_client.py new file mode 100644 index 0000000000..ff2956a93c --- /dev/null +++ b/morpheus/service/milvus_client.py @@ -0,0 +1,268 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import typing + +from pymilvus import Collection +from pymilvus import DataType +from pymilvus import MilvusClient as PyMilvusClient +from pymilvus.orm.mutation import MutationResult + +# Milvus data type mapping dictionary +MILVUS_DATA_TYPE_MAP = { + "int8": DataType.INT8, + "int16": DataType.INT16, + "int32": DataType.INT32, + "int64": DataType.INT64, + "bool": DataType.BOOL, + "float": DataType.FLOAT, + "double": DataType.DOUBLE, + "binary_vector": DataType.BINARY_VECTOR, + "float_vector": DataType.FLOAT_VECTOR, + "string": DataType.STRING, + "varchar": DataType.VARCHAR, + "json": DataType.JSON, +} + + +def handle_exceptions(func_name: str, error_message: str) -> typing.Callable: + """ + Decorator function to handle exceptions and log errors. + + Parameters + ---------- + func_name : str + Name of the func being decorated. + error_message : str + Error message to log in case of an exception. + + Returns + ------- + typing.Callable + Decorated function. + """ + + def decorator(func): + + def wrapper(*args, **kwargs): + try: + return func(*args, **kwargs) + except Exception as ex: + raise RuntimeError(f"{error_message} - Failed to execute {func_name}") from ex + + return wrapper + + return decorator + + +class MilvusClient(PyMilvusClient): + """ + Extension of the `MilvusClient` class with custom functions. + + Parameters + ---------- + uri : str + URI for connecting to Milvus server. + user : str + User name for authentication. + password : str + Password for authentication. + db_name : str + Name of the Milvus database. + token : str + Token for authentication. + **kwargs : dict[str, typing.Any] + Additional keyword arguments for the MilvusClient constructor. + """ + + def __init__(self, uri: str, user: str, password: str, db_name: str, token: str, **kwargs: dict[str, typing.Any]): + super().__init__(uri=uri, user=user, password=password, db_name=db_name, token=token, **kwargs) + + @handle_exceptions("has_collection", "Error checking collection existence") + def has_collection(self, collection_name: str) -> bool: + """ + Check if a collection exists in the database. + + Parameters + ---------- + collection_name : str + Name of the collection to check. + + Returns + ------- + bool + True if the collection exists, False otherwise. + """ + conn = self._get_connection() + return conn.has_collection(collection_name) + + @handle_exceptions("create_partition", "Error creating partition") + def create_partition(self, collection_name: str, partition_name: str, timeout: float = 1.0) -> None: + """ + Create a partition within a collection. + + Parameters + ---------- + collection_name : str + Name of the collection. + partition_name : str + Name of the partition to create. + timeout : float, optional + Timeout for the operation in seconds (default is 1.0). + """ + conn = self._get_connection() + conn.create_partition(collection_name=collection_name, partition_name=partition_name, timeout=timeout) + + @handle_exceptions("load_collection", "Error loading collection") + def load_collection(self, collection_name: str) -> None: + """ + Load a collection into memory. + + Parameters + ---------- + collection_name : str + Name of the collection to load. + """ + conn = self._get_connection() + conn.load_collection(collection_name=collection_name) + + @handle_exceptions("release_collection", "Error releasing collection") + def release_collection(self, collection_name: str) -> None: + """ + Release a loaded collection from memory. + + Parameters + ---------- + collection_name : str + Name of the collection to release. + """ + conn = self._get_connection() + conn.release_collection(collection_name=collection_name) + + @handle_exceptions("upsert", "Error upserting collection entities") + def upsert(self, collection_name: str, entities: list, **kwargs: dict[str, typing.Any]) -> MutationResult: + """ + Upsert entities into a collection. + + Parameters + ---------- + collection_name : str + Name of the collection to upsert into. + entities : list + List of entities to upsert. + **kwargs : dict[str, typing.Any] + Additional keyword arguments for the upsert operation. + + Returns + ------- + MutationResult + Result of the upsert operation. + """ + conn = self._get_connection() + return conn.upsert(collection_name=collection_name, entities=entities, **kwargs) + + @handle_exceptions("delete_by_expr", "Error deleting collection entities") + def delete_by_expr(self, collection_name: str, expression: str, **kwargs: dict[str, typing.Any]) -> MutationResult: + """ + Delete entities from a collection using an expression. + + Parameters + ---------- + collection_name : str + Name of the collection to delete from. + expression : str + Deletion expression. + **kwargs : dict[str, typing.Any] + Additional keyword arguments for the delete operation. + + Returns + ------- + MutationResult + Returns result of delete operation. + """ + conn = self._get_connection() + return conn.delete(collection_name=collection_name, expression=expression, **kwargs) + + @handle_exceptions("has_partition", "Error checking partition existence") + def has_partition(self, collection_name: str, partition_name: str) -> bool: + """ + Check if a partition exists within a collection. + + Parameters + ---------- + collection_name : str + Name of the collection. + partition_name : str + Name of the partition to check. + + Returns + ------- + bool + True if the partition exists, False otherwise. + """ + conn = self._get_connection() + return conn.has_partition(collection_name=collection_name, partition_name=partition_name) + + @handle_exceptions("drop_partition", "Error dropping partition") + def drop_partition(self, collection_name: str, partition_name: str) -> None: + """ + Drop a partition from a collection. + + Parameters + ---------- + collection_name : str + Name of the collection. + partition_name : str + Name of the partition to drop. + """ + conn = self._get_connection() + conn.drop_partition(collection_name=collection_name, partition_name=partition_name) + + @handle_exceptions("drop_index", "Error dropping index") + def drop_index(self, collection_name: str, field_name: str, index_name: str) -> None: + """ + Drop an index from a collection. + + Parameters + ---------- + collection_name : str + Name of the collection. + field_name : str + Name of the field associated with the index. + index_name : str + Name of the index to drop. + """ + conn = self._get_connection() + conn.drop_index(collection_name=collection_name, field_name=field_name, index_name=index_name) + + @handle_exceptions("get_collection", "Error getting collection object") + def get_collection(self, collection_name: str, **kwargs: dict[str, typing.Any]) -> Collection: + """ + Returns `Collection` object associated with the given collection name. + + Parameters + ---------- + collection_name : str + Name of the collection to delete from. + **kwargs : dict[str, typing.Any] + Additional keyword arguments to get Collection instance. + + Returns + ------- + Collection + Returns pymilvus Collection instance. + """ + collection = Collection(name=collection_name, using=self._using, **kwargs) + + return collection diff --git a/morpheus/service/milvus_vector_db_service.py b/morpheus/service/milvus_vector_db_service.py new file mode 100644 index 0000000000..18ae5dd4a2 --- /dev/null +++ b/morpheus/service/milvus_vector_db_service.py @@ -0,0 +1,600 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy +import logging +import threading +import time +import typing + +import pandas as pd +import pymilvus +from pymilvus.orm.mutation import MutationResult + +import cudf + +from morpheus.service.milvus_client import MILVUS_DATA_TYPE_MAP +from morpheus.service.milvus_client import MilvusClient +from morpheus.service.vector_db_service import VectorDBService + +logger = logging.getLogger(__name__) + + +def with_collection_lock(func: typing.Callable) -> typing.Callable: + """ + A decorator to synchronize access to a collection with a lock. This decorator ensures that operations on a + specific collection within the Milvus Vector Database are synchronized by acquiring and + releasing a collection-specific lock. + + Parameters + ---------- + func : Callable + The function to be wrapped with the lock. + + Returns + ------- + Callable + The wrapped function with the lock acquisition logic. + """ + + def wrapper(self, name, *args, **kwargs): + collection_lock = MilvusVectorDBService.get_collection_lock(name) + with collection_lock: + logger.debug("Acquiring lock for collection: %s", name) + result = func(self, name, *args, **kwargs) + logger.debug("Releasing lock for collection: %s", name) + return result + + return wrapper + + +class MilvusVectorDBService(VectorDBService): + """ + Service class for Milvus Vector Database implementation. This class provides functions for interacting + with a Milvus vector database. + + Parameters + ---------- + host : str + The hostname or IP address of the Milvus server. + port : str + The port number for connecting to the Milvus server. + alias : str, optional + Alias for the Milvus connection, by default "default". + **kwargs : dict + Additional keyword arguments specific to the Milvus connection configuration. + """ + + _collection_locks = {} + _cleanup_interval = 600 # 10mins + _last_cleanup_time = time.time() + + def __init__(self, + uri: str, + user: str = "", + password: str = "", + db_name: str = "", + token: str = "", + **kwargs: dict[str, typing.Any]): + + self._client = MilvusClient(uri=uri, user=user, password=password, db_name=db_name, token=token, **kwargs) + + def has_store_object(self, name: str) -> bool: + """ + Check if a collection exists in the Milvus vector database. + + Parameters + ---------- + name : str + Name of the collection to check. + + Returns + ------- + bool + True if the collection exists, False otherwise. + """ + return self._client.has_collection(collection_name=name) + + def list_store_objects(self, **kwargs: dict[str, typing.Any]) -> list[str]: + """ + List the names of all collections in the Milvus vector database. + + Returns + ------- + list[str] + A list of collection names. + """ + return self._client.list_collections(**kwargs) + + def _create_schema_field(self, field_conf: dict) -> pymilvus.FieldSchema: + + name = field_conf.pop("name") + dtype = field_conf.pop("dtype") + + dtype = MILVUS_DATA_TYPE_MAP[dtype.lower()] + + field_schema = pymilvus.FieldSchema(name=name, dtype=dtype, **field_conf) + + return field_schema + + @with_collection_lock + def create(self, name: str, overwrite: bool = False, **kwargs: dict[str, typing.Any]): + """ + Create a collection in the Milvus vector database with the specified name and configuration. This method + creates a new collection in the Milvus vector database with the provided name and configuration options. + If the collection already exists, it can be overwritten if the `overwrite` parameter is set to True. + + Parameters + ---------- + name : str + Name of the collection to be created. + overwrite : bool, optional + If True, the collection will be overwritten if it already exists, by default False. + **kwargs : dict + Additional keyword arguments containing collection configuration. + + Raises + ------ + ValueError + If the provided schema fields configuration is empty. + """ + logger.debug("Creating collection: %s, overwrite=%s, kwargs=%s", name, overwrite, kwargs) + # Preserve original configuration. + kwargs = copy.deepcopy(kwargs) + + collection_conf = kwargs.get("collection_conf") + auto_id = collection_conf.get("auto_id", False) + index_conf = collection_conf.get("index_conf", None) + partition_conf = collection_conf.get("partition_conf", None) + + schema_conf = collection_conf.get("schema_conf") + schema_fields_conf = schema_conf.pop("schema_fields") + + index_param = {} + + if not self.has_store_object(name) or overwrite: + if overwrite and self.has_store_object(name): + self.drop(name) + + if len(schema_fields_conf) == 0: + raise ValueError("Cannot create collection as provided empty schema_fields configuration") + + schema_fields = [self._create_schema_field(field_conf=field_conf) for field_conf in schema_fields_conf] + + schema = pymilvus.CollectionSchema(fields=schema_fields, **schema_conf) + + if index_conf: + field_name = index_conf.pop("field_name") + metric_type = index_conf.pop("metric_type") + index_param = self._client.prepare_index_params(field_name=field_name, + metric_type=metric_type, + **index_conf) + + self._client.create_collection_with_schema(collection_name=name, + schema=schema, + index_param=index_param, + auto_id=auto_id, + shards_num=collection_conf.get("shards", 2), + consistency_level=collection_conf.get( + "consistency_level", "Strong")) + + if partition_conf: + timeout = partition_conf.get("timeout", 1.0) + # Iterate over each partition configuration + for part in partition_conf["partitions"]: + self._client.create_partition(collection_name=name, partition_name=part["name"], timeout=timeout) + + @with_collection_lock + def insert(self, name: str, data: list[list] | list[dict], **kwargs: dict[str, + typing.Any]) -> dict[str, typing.Any]: + """ + Insert a collection specific data in the Milvus vector database. + + Parameters + ---------- + name : str + Name of the collection to be inserted. + data : list[list] | list[dict] + Data to be inserted in the collection. + **kwargs : dict[str, typing.Any] + Additional keyword arguments containing collection configuration. + + Returns + ------- + dict + Returns response content as a dictionary. + + Raises + ------ + RuntimeError + If the collection not exists exists. + """ + + return self._collection_insert(name, data, **kwargs) + + def _collection_insert(self, name: str, data: list[list] | list[dict], + **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]: + + if not self.has_store_object(name): + raise RuntimeError(f"Collection {name} doesn't exist.") + + collection = None + try: + collection_conf = kwargs.get("collection_conf", {}) + partition_name = collection_conf.get("partition_name", "_default") + + collection = self._client.get_collection(collection_name=name, **collection_conf) + result = collection.insert(data, partition_name=partition_name) + collection.flush() + finally: + collection.release() + + result_dict = { + "primary_keys": result.primary_keys, + "insert_count": result.insert_count, + "delete_count": result.delete_count, + "upsert_count": result.upsert_count, + "timestamp": result.timestamp, + "succ_count": result.succ_count, + "err_count": result.err_count, + "succ_index": result.succ_index, + "err_index": result.err_index + } + + return result_dict + + @with_collection_lock + def insert_dataframe(self, + name: str, + df: typing.Union[cudf.DataFrame, pd.DataFrame], + **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]: + """ + Converts dataframe to rows and insert to a collection in the Milvus vector database. + + Parameters + ---------- + name : str + Name of the collection to be inserted. + df : typing.Union[cudf.DataFrame, pd.DataFrame] + Dataframe to be inserted in the collection. + **kwargs : dict[str, typing.Any] + Additional keyword arguments containing collection configuration. + + Returns + ------- + dict + Returns response content as a dictionary. + + Raises + ------ + RuntimeError + If the collection not exists exists. + """ + if not self.has_store_object(name): + raise RuntimeError(f"Collection {name} doesn't exist.") + + if isinstance(df, cudf.DataFrame): + df = df.to_pandas() + + dict_of_rows = df.to_dict(orient='records') + + return self._collection_insert(name, dict_of_rows, **kwargs) + + @with_collection_lock + def search(self, name: str, query: str = None, **kwargs: dict[str, typing.Any]) -> typing.Any: + """ + Search for data in a collection in the Milvus vector database. + + This method performs a search operation in the specified collection/partition in the Milvus vector database. + + Parameters + ---------- + name : str + Name of the collection to search within. + query : str, optional + The search query, which can be a filter expression, by default None. + **kwargs : dict + Additional keyword arguments for the search operation. + + Returns + ------- + typing.Any + The search result, which can vary depending on the query and options. + + Raises + ------ + RuntimeError + If an error occurs during the search operation. + If query argument is `None` and `data` keyword argument doesn't exist. + If `data` keyword arguement is `None`. + """ + + logger.debug("Searching in collection: %s, query=%s, kwargs=%s", name, query, kwargs) + + try: + self._client.load_collection(collection_name=name) + if query is not None: + result = self._client.query(collection_name=name, filter=query, **kwargs) + else: + if "data" not in kwargs: + raise RuntimeError("The search operation requires that search vectors be " + + "provided as a keyword argument 'data'") + if kwargs["data"] is None: + raise RuntimeError("Argument 'data' cannot be None") + + data = kwargs.pop("data") + + result = self._client.search(collection_name=name, data=data, **kwargs) + return result + + except pymilvus.exceptions.MilvusException as exec_info: + raise RuntimeError(f"Unable to perform serach: {exec_info}") from exec_info + + finally: + self._client.release_collection(collection_name=name) + + @with_collection_lock + def update(self, name: str, data: list[typing.Any], **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]: + """ + Update data in the vector database. + + Parameters + ---------- + name : str + Name of the resource. + data : list[typing.Any] + Data to be updated in the collection. + **kwargs : dict[str, typing.Any] + Extra keyword arguments specific to upsert operation. + + Returns + ------- + dict[str, typing.Any] + Returns result of the updated operation stats. + """ + + if not isinstance(data, list): + raise RuntimeError("Data is not of type list.") + + result = self._client.upsert(collection_name=name, entities=data, **kwargs) + + return self._convert_mutation_result_to_dict(result=result) + + @with_collection_lock + def delete_by_keys(self, name: str, keys: int | str | list, **kwargs: dict[str, typing.Any]) -> typing.Any: + """ + Delete vectors by keys from the resource. + + Parameters + ---------- + name : str + Name of the resource. + keys : int | str | list + Primary keys to delete vectors. + **kwargs : dict[str, typing.Any] + Extra keyword arguments specific to the vector database implementation. + + Returns + ------- + typing.Any + Returns result of the given keys that are delete from the collection. + """ + + result = self._client.delete(collection_name=name, pks=keys, **kwargs) + + return result + + @with_collection_lock + def delete(self, name: str, expr: str, **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]: + """ + Delete vectors from the resource using expressions. + + Parameters + ---------- + name : str + Name of the resource. + expr : str + Delete expression. + **kwargs : dict[str, typing.Any] + Extra keyword arguments specific to the vector database implementation. + + Returns + ------- + dict[str, typing.Any] + Returns result of the given keys that are delete from the collection. + """ + + result = self._client.delete_by_expr(collection_name=name, expression=expr, **kwargs) + + return self._convert_mutation_result_to_dict(result=result) + + @with_collection_lock + def retrieve_by_keys(self, name: str, keys: int | str | list, **kwargs: dict[str, typing.Any]) -> list[typing.Any]: + """ + Retrieve the inserted vectors using their primary keys from the Collection. + + Parameters + ---------- + name : str + Name of the collection. + keys : int | str | list + Primary keys to get vectors for. Depending on pk_field type it can be int or str + or a list of either. + **kwargs : dict[str, typing.Any] + Additional keyword arguments for the retrieval operation. + + Returns + ------- + list[typing.Any] + Returns result rows of the given keys from the collection. + """ + + result = None + + try: + self._client.load_collection(collection_name=name) + result = self._client.get(collection_name=name, ids=keys, **kwargs) + except pymilvus.exceptions.MilvusException as exec_info: + raise RuntimeError(f"Unable to perform serach: {exec_info}") from exec_info + + finally: + self._client.release_collection(collection_name=name) + + return result + + def count(self, name: str, **kwargs: dict[str, typing.Any]) -> int: + """ + Returns number of rows/entities in the given collection. + + Parameters + ---------- + name : str + Name of the collection. + **kwargs : dict[str, typing.Any] + Additional keyword arguments for the count operation. + + Returns + ------- + int + Returns number of entities in the collection. + """ + + return self._client.num_entities(collection_name=name, **kwargs) + + def drop(self, name: str, **kwargs: dict[str, typing.Any]) -> None: + """ + Drop a collection, index, or partition in the Milvus vector database. + + This method allows you to drop a collection, an index within a collection, + or a specific partition within a collection in the Milvus vector database. + + Parameters + ---------- + name : str + Name of the collection, index, or partition to be dropped. + **kwargs : dict + Additional keyword arguments for specifying the type and partition name (if applicable). + + Notes on Expected Keyword Arguments: + ------------------------------------ + - 'resource' (str, optional): + Specifies the type of resource to drop. Possible values: 'collection' (default), 'index', 'partition'. + + - 'partition_name' (str, optional): + Required when dropping a specific partition within a collection. Specifies the partition name to be dropped. + + - 'field_name' (str, optional): + Required when dropping an index within a collection. Specifies the field name for which the index is created. + + - 'index_name' (str, optional): + Required when dropping an index within a collection. Specifies the name of the index to be dropped. + + Raises + ------ + ValueError + If mandatory arguments are missing or if the provided 'resource' value is invalid. + """ + + logger.debug("Dropping collection: %s, kwargs=%s", name, kwargs) + + if self.has_store_object(name): + resource = kwargs.get("resource", "collection") + if resource == "collection": + self._client.drop_collection(collection_name=name) + elif resource == "partition": + if "partition_name" not in kwargs: + raise ValueError("Mandatory argument 'partition_name' is required when resource='partition'") + partition_name = kwargs["partition_name"] + if self._client.has_partition(collection_name=name, partition_name=partition_name): + self._client.drop_partition(collection_name=name, partition_name=partition_name) + elif resource == "index": + if "field_name" in kwargs and "index_name" in kwargs: + self._client.drop_index(collection_name=name, + field_name=kwargs["field_name"], + index_name=kwargs["index_name"]) + else: + raise ValueError( + "Mandatory arguments 'field_name' and 'index_name' are required when resource='index'") + + def describe(self, name: str, **kwargs: dict[str, typing.Any]) -> dict: + """ + Describe the collection in the vector database. + + Parameters + ---------- + name : str + Name of the collection. + **kwargs : dict[str, typing.Any] + Additional keyword arguments specific to the Milvus vector database. + + Returns + ------- + dict + Returns collection information. + """ + + return self._client.describe_collection(collection_name=name, **kwargs) + + def close(self) -> None: + """ + Close the connection to the Milvus vector database. + + This method disconnects from the Milvus vector database by removing the connection. + + """ + self._client.close() + + def _convert_mutation_result_to_dict(self, result: MutationResult) -> dict[str, typing.Any]: + result_dict = { + "insert_count": result.insert_count, + "delete_count": result.delete_count, + "upsert_count": result.upsert_count, + "timestamp": result.timestamp, + "succ_count": result.succ_count, + "err_count": result.err_count + } + return result_dict + + @classmethod + def get_collection_lock(cls, name: str) -> threading.Lock: + """ + Get a lock for a given collection name. + + Parameters + ---------- + name : str + Name of the collection for which to acquire the lock. + + Returns + ------- + threading.Lock + A thread lock specific to the given collection name. + """ + + current_time = time.time() + + if name not in cls._collection_locks: + cls._collection_locks[name] = {"lock": threading.Lock(), "last_used": current_time} + else: + cls._collection_locks[name]["last_used"] = current_time + + if (current_time - cls._last_cleanup_time) >= cls._cleanup_interval: + for lock_name, lock_info in cls._collection_locks.copy().items(): + last_used = lock_info["last_used"] + if current_time - last_used >= cls._cleanup_interval: + logger.debug("Cleaning up lock for collection: %s", lock_name) + del cls._collection_locks[lock_name] + cls._last_cleanup_time = current_time + + return cls._collection_locks[name]["lock"] diff --git a/morpheus/service/vector_db_service.py b/morpheus/service/vector_db_service.py new file mode 100644 index 0000000000..650c7860e8 --- /dev/null +++ b/morpheus/service/vector_db_service.py @@ -0,0 +1,323 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import typing +from abc import ABC +from abc import abstractmethod + +import pandas as pd + +import cudf + +logger = logging.getLogger(__name__) + + +class VectorDBService(ABC): + """ + Class used for vectorstore specific implementation. + """ + + @abstractmethod + def insert(self, name: str, data: list[list] | list[dict], **kwargs: dict[str, typing.Any]) -> dict: + """ + Insert data into the vector database. + + Parameters + ---------- + name : str + Name of the resource. + data : list[list] | list[dict] + Data to be inserted into the resource. + **kwargs : dict[str, typing.Any] + Extra keyword arguments specific to the vector database implementation. + + Returns + ------- + dict + Returns response content as a dictionary. + """ + + pass + + @abstractmethod + def insert_dataframe(self, + name: str, + df: typing.Union[cudf.DataFrame, pd.DataFrame], + **kwargs: dict[str, typing.Any]) -> dict: + """ + Converts dataframe to rows and insert into the vector database resource. + + Parameters + ---------- + name : str + Name of the resource to be inserted. + df : typing.Union[cudf.DataFrame, pd.DataFrame] + Dataframe to be inserted. + **kwargs : dict[str, typing.Any] + Additional keyword arguments containing collection configuration. + + Returns + ------- + dict + Returns response content as a dictionary. + + Raises + ------ + RuntimeError + If the resource not exists exists. + """ + pass + + @abstractmethod + def search(self, name: str, query: str = None, **kwargs: dict[str, typing.Any]) -> typing.Any: + """ + Search for content in the vector database. + + Parameters + ---------- + name : str + Name of the resource. + query : str, default None + Query to execute on the given resource. + **kwargs : dict[str, typing.Any] + Extra keyword arguments specific to the vector database implementation. + + Returns + ------- + typing.Any + Returns search results. + """ + + pass + + @abstractmethod + def drop(self, name: str, **kwargs: dict[str, typing.Any]) -> None: + """ + Drop resources from the vector database. + + Parameters + ---------- + name : str + Name of the resource. + **kwargs : dict[str, typing.Any] + Extra keyword arguments specific to the vector database implementation. + """ + + pass + + @abstractmethod + def update(self, name: str, data: list[typing.Any], **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]: + """ + Update data in the vector database. + + Parameters + ---------- + name : str + Name of the resource. + data : list[typing.Any] + Data to be updated in the resource. + **kwargs : dict[str, typing.Any] + Extra keyword arguments specific to the vector database implementation. + + Returns + ------- + dict[str, typing.Any] + Returns result of the updated operation stats. + """ + + pass + + @abstractmethod + def delete(self, name: str, expr: str, **kwargs: dict[str, typing.Any]) -> dict[str, typing.Any]: + """ + Delete data in the vector database. + + Parameters + ---------- + name : str + Name of the resource. + expr : typing.Any + Delete expression. + **kwargs : dict[str, typing.Any] + Extra keyword arguments specific to the vector database implementation. + + Returns + ------- + dict[str, typing.Any] + Returns result of the delete operation stats. + """ + + pass + + @abstractmethod + def create(self, name: str, overwrite: bool = False, **kwargs: dict[str, typing.Any]) -> None: + """ + Create resources in the vector database. + + Parameters + ---------- + name : str + Name of the resource. + overwrite : bool, default False + Whether to overwrite the resource if it already exists. + **kwargs : dict[str, typing.Any] + Extra keyword arguments specific to the vector database implementation. + """ + + pass + + @abstractmethod + def describe(self, name: str, **kwargs: dict[str, typing.Any]) -> dict: + """ + Describe resource in the vector database. + + Parameters + ---------- + name : str + Name of the resource. + **kwargs : dict[str, typing.Any] + Extra keyword arguments specific to the vector database implementation. + + Returns + ------- + dict + Returns resource information. + """ + + pass + + @abstractmethod + def close(self) -> None: + """ + Close connection to the vector database. + """ + + pass + + @abstractmethod + def has_store_object(self, name: str) -> bool: + """ + Check if a resource exists in the vector database. + + Parameters + ---------- + name : str + Name of the resource. + + Returns + ------- + bool + Returns True if resource exists in the vector database, otherwise False. + """ + + pass + + @abstractmethod + def list_store_objects(self, **kwargs: dict[str, typing.Any]) -> list[str]: + """ + List existing resources in the vector database. + + Parameters + ---------- + **kwargs : dict[str, typing.Any] + Extra keyword arguments specific to the vector database implementation. + + Returns + ------- + list[str] + Returns available resouce names in the vector database. + """ + + pass + + # pylint: disable=unused-argument + def transform(self, data: typing.Any, **kwargs: dict[str, typing.Any]) -> typing.Any: + """ + Transform data according to the specific vector database implementation. + + Parameters + ---------- + data : typing.Any + Data to be updated in the resource. + **kwargs : dict[str, typing.Any] + Extra keyword arguments specific to the vector database implementation. + + Returns + ------- + typing.Any + Returns transformed data as per the implementation. + """ + return data + + @abstractmethod + def retrieve_by_keys(self, name: str, keys: int | str | list, **kwargs: dict[str, typing.Any]) -> list[typing.Any]: + """ + Retrieve the inserted vectors using keys from the resource. + + Parameters + ---------- + name : str + Name of the resource. + keys : typing.Any + Primary keys to get vectors. + **kwargs : dict[str, typing.Any] + Extra keyword arguments specific to the vector database implementation. + + Returns + ------- + list[typing.Any] + Returns rows of the given keys that exists in the resource. + """ + pass + + @abstractmethod + def delete_by_keys(self, name: str, keys: int | str | list, **kwargs: dict[str, typing.Any]) -> typing.Any: + """ + Delete vectors by keys from the resource. + + Parameters + ---------- + name : str + Name of the resource. + keys : int | str | list + Primary keys to delete vectors. + **kwargs : dict[str, typing.Any] + Extra keyword arguments specific to the vector database implementation. + + Returns + ------- + typing.Any + Returns vectors of the given keys that are delete from the resource. + """ + pass + + @abstractmethod + def count(self, name: str, **kwargs: dict[str, typing.Any]) -> int: + """ + Returns number of rows/entities in the given resource. + + Parameters + ---------- + name : str + Name of the resource. + **kwargs : dict[str, typing.Any] + Extra keyword arguments specific to the vector database implementation. + + Returns + ------- + int + Returns number of rows/entities in the given resource. + """ + pass diff --git a/morpheus/stages/output/write_to_vector_db.py b/morpheus/stages/output/write_to_vector_db.py new file mode 100644 index 0000000000..3fef0fcd66 --- /dev/null +++ b/morpheus/stages/output/write_to_vector_db.py @@ -0,0 +1,123 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import typing + +import mrc +from mrc.core import operators as ops + +from morpheus.config import Config +from morpheus.messages import ControlMessage +from morpheus.pipeline.single_port_stage import SinglePortStage +from morpheus.pipeline.stream_pair import StreamPair +from morpheus.service.vector_db_service import VectorDBService +from morpheus.utils.vector_db_service_utils import VectorDBServiceFactory + +logger = logging.getLogger(__name__) + + +class WriteToVectorDBStage(SinglePortStage): + """ + Writes messages to a Vector Database. + + Parameters + ---------- + config : `morpheus.config.Config` + Pipeline configuration instance. + resource_name : str + The name of the resource managed by this instance. + resource_conf : dict + Additional resource configuration when performing vector database writes. + service : typing.Union[str, VectorDBService] + Either the name of the vector database service to use or an instance of VectorDBService + for managing the resource. + **service_kwargs : dict[str, typing.Any] + Additional keyword arguments to pass when creating a VectorDBService instance. + + Raises + ------ + ValueError + If `service` is not a valid string (service name) or an instance of VectorDBService. + """ + + def __init__(self, + config: Config, + resource_name: str, + service: typing.Union[str, VectorDBService], + **service_kwargs: dict[str, typing.Any]): + + super().__init__(config) + + self._resource_name = resource_name + self._resource_kwargs = {} + + if "resource_kwargs" in service_kwargs: + self._resource_kwargs = service_kwargs.pop("resource_kwargs") + + if isinstance(service, str): + # If service is a string, assume it's the service name + self._service: VectorDBService = VectorDBServiceFactory.create_instance(service_name=service, + **service_kwargs) + elif isinstance(service, VectorDBService): + # If service is an instance of VectorDBService, use it directly + self._service: VectorDBService = service + else: + raise ValueError("service must be a string (service name) or an instance of VectorDBService") + + @property + def name(self) -> str: + return "to-vector-db" + + def accepted_types(self) -> typing.Tuple: + """ + Returns accepted input types for this stage. + + Returns + ------- + typing.Tuple(`morpheus.pipeline.messages.MessageMeta`, ) + Accepted input types. + + """ + return (ControlMessage, ) + + def supports_cpp_node(self): + """Indicates whether this stage supports a C++ node.""" + return False + + def on_completed(self): + # Close vector database service connection + self._service.close() + + def _build_single(self, builder: mrc.Builder, input_stream: StreamPair) -> StreamPair: + + stream = input_stream[0] + + def on_data(ctrl_msg: ControlMessage) -> ControlMessage: + # Insert entries in the dataframe to vector database. + result = self._service.insert_dataframe(name=self._resource_name, + df=ctrl_msg.payload().df, + **self._resource_kwargs) + + ctrl_msg.set_metadata("insert_response", result) + + return ctrl_msg + + to_vector_db = builder.make_node(self.unique_name, ops.map(on_data), ops.on_completed(self.on_completed)) + + builder.make_edge(stream, to_vector_db) + stream = to_vector_db + + # Return input unchanged to allow passthrough + return stream, input_stream[1] diff --git a/morpheus/utils/vector_db_service_utils.py b/morpheus/utils/vector_db_service_utils.py new file mode 100644 index 0000000000..fa9145941f --- /dev/null +++ b/morpheus/utils/vector_db_service_utils.py @@ -0,0 +1,55 @@ +# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib +import typing + + +class VectorDBServiceFactory: + """ + Factory for creating instances of vector database service classes. This factory allows dynamically + creating instances of vector database service classes based on the provided service name. + Each service name corresponds to a specific implementation class. + + Parameters + ---------- + service_name : str + The name of the vector database service to create. + *args : typing.Any + Variable-length argument list to pass to the service constructor. + **kwargs : dict[str, typing.Any] + Arbitrary keyword arguments to pass to the service constructor. + + Returns + ------- + An instance of the specified vector database service class. + + Raises + ------ + ValueError + If the specified service name is not found or does not correspond to a valid service class. + """ + + @classmethod + def create_instance(cls, service_name: str, *args: typing.Any, **kwargs: dict[str, typing.Any]): + try: + module_name = f"morpheus.service.{service_name}_vector_db_service" + module = importlib.import_module(module_name) + class_name = f"{service_name.capitalize()}VectorDBService" + class_ = getattr(module, class_name) + instance = class_(*args, **kwargs) + return instance + except (ModuleNotFoundError, AttributeError) as exc: + raise ValueError(f"Service {service_name} not found. Ensure that the corresponding service class," + + f"such as {module_name}, has been implemented.") from exc diff --git a/tests/conftest.py b/tests/conftest.py index e3df4500c5..425f604701 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -937,3 +937,31 @@ def filter_probs_df(dataset, use_cpp: bool): that as well, while excluding the combination of C++ execution and Pandas dataframes. """ yield dataset["filter_probs.csv"] + + +@pytest.fixture(scope="session") +def milvus_server_uri(): + """ + Pytest fixture to start and stop a Milvus server and provide its URI for testing. + + This fixture starts a Milvus server, retrieves its URI (Uniform Resource Identifier), and provides + the URI as a yield value to the tests using this fixture. After all tests in the module are + completed, the Milvus server is stopped. + """ + from milvus import default_server + + logger = logging.getLogger(f"morpheus.{__name__}") + try: + default_server.start() + host = "127.0.0.1" + port = default_server.listen_port + uri = f"http://{host}:{port}" + + yield uri + except Exception as exec_inf: + logger.error("Error in starting Milvus server: %s", exec_inf) + finally: + try: + default_server.stop() + except Exception as exec_inf: + logger.error("Error in stopping Milvus server: %s", exec_inf) diff --git a/tests/test_milvus_vector_db_service.py b/tests/test_milvus_vector_db_service.py new file mode 100644 index 0000000000..28a3d646ed --- /dev/null +++ b/tests/test_milvus_vector_db_service.py @@ -0,0 +1,428 @@ +#!/usr/bin/env python +# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import concurrent.futures +import json +import os + +import numpy as np +import pytest + +from _utils import TEST_DIRS +from morpheus.service.milvus_client import MILVUS_DATA_TYPE_MAP +from morpheus.service.milvus_vector_db_service import MilvusVectorDBService + + +@pytest.fixture(scope="module", name="milvus_service_fixture") +def milvus_service(milvus_server_uri: str): + service = MilvusVectorDBService(uri=milvus_server_uri) + yield service + + +def load_json(filename): + conf_filepath = os.path.join(TEST_DIRS.tests_data_dir, "service", filename) + + with open(conf_filepath, 'r', encoding="utf-8") as json_file: + collection_config = json.load(json_file) + + return collection_config + + +@pytest.fixture(scope="module", name="data_fixture") +def data(): + inital_data = [{"id": i, "embedding": [i / 10.0] * 10, "age": 25 + i} for i in range(10)] + yield inital_data + + +@pytest.fixture(scope="module", name="idx_part_collection_config_fixture") +def idx_part_collection_config(): + collection_config = load_json(filename="milvus_idx_part_collection_conf.json") + yield collection_config + + +@pytest.fixture(scope="module", name="simple_collection_config_fixture") +def simple_collection_config(): + collection_config = load_json(filename="milvus_simple_collection_conf.json") + yield collection_config + + +@pytest.mark.slow +def test_list_store_objects(milvus_service_fixture: MilvusVectorDBService): + # List all collections in the Milvus server. + collections = milvus_service_fixture.list_store_objects() + assert isinstance(collections, list) + + +@pytest.mark.slow +def test_has_store_object(milvus_service_fixture: MilvusVectorDBService): + # Check if a non-existing collection exists in the Milvus server. + collection_name = "non_existing_collection" + assert not milvus_service_fixture.has_store_object(collection_name) + + +@pytest.mark.slow +def test_create_and_drop_collection(milvus_service_fixture: MilvusVectorDBService, + idx_part_collection_config_fixture: dict): + # Create a collection and check if it exists. + collection_name = "test_collection" + milvus_service_fixture.create(collection_name, **idx_part_collection_config_fixture) + assert milvus_service_fixture.has_store_object(collection_name) + + # Drop the collection and check if it no longer exists. + milvus_service_fixture.drop(collection_name) + assert not milvus_service_fixture.has_store_object(collection_name) + + +@pytest.mark.slow +def test_insert_and_retrieve_by_keys(milvus_service_fixture: MilvusVectorDBService, + idx_part_collection_config_fixture: dict, + data_fixture: list[dict]): + # Create a collection. + collection_name = "test_insert_collection" + milvus_service_fixture.create(collection_name, **idx_part_collection_config_fixture) + + # Insert data into the collection. + response = milvus_service_fixture.insert(collection_name, data_fixture) + assert response["insert_count"] == len(data_fixture) + + # Retrieve inserted data by primary keys. + keys_to_retrieve = [2, 4, 6] + retrieved_data = milvus_service_fixture.retrieve_by_keys(collection_name, keys_to_retrieve) + assert len(retrieved_data) == len(keys_to_retrieve) + + # Clean up the collection. + milvus_service_fixture.drop(collection_name) + + +@pytest.mark.slow +def test_search(milvus_service_fixture: MilvusVectorDBService, + idx_part_collection_config_fixture: dict, + data_fixture: list[dict]): + # Create a collection. + collection_name = "test_search_collection" + milvus_service_fixture.create(collection_name, **idx_part_collection_config_fixture) + + # Insert data into the collection. + milvus_service_fixture.insert(collection_name, data_fixture) + + # Define a search query. + query = "age==26 or age==27" + + # Perform a search in the collection. + search_result = milvus_service_fixture.search(collection_name, query) + assert len(search_result) == 2 + assert search_result[0]["age"] in [26, 27] + assert search_result[1]["age"] in [26, 27] + + # Clean up the collection. + milvus_service_fixture.drop(collection_name) + + +@pytest.mark.slow +def test_search_with_data(milvus_service_fixture: MilvusVectorDBService, + idx_part_collection_config_fixture: dict, + data_fixture: list[dict]): + # Create a collection. + collection_name = "test_search_with_data_collection" + milvus_service_fixture.create(collection_name, **idx_part_collection_config_fixture) + + # Insert data to the collection. + milvus_service_fixture.insert(collection_name, data_fixture) + + rng = np.random.default_rng(seed=100) + search_vec = rng.random((1, 10)) + + # Define a search filter. + fltr = "age==26 or age==27" + + # Perform a search in the collection. + search_result = milvus_service_fixture.search(collection_name, + data=search_vec, + filter=fltr, + output_fields=["id", "age"]) + + assert len(search_result[0]) == 2 + assert search_result[0][0]["entity"]["age"] in [26, 27] + assert search_result[0][1]["entity"]["age"] in [26, 27] + assert len(search_result[0][0]["entity"].keys()) == 2 + assert sorted(list(search_result[0][0]["entity"].keys())) == ["age", "id"] + + # Clean up the collection. + milvus_service_fixture.drop(collection_name) + + +@pytest.mark.slow +def test_count(milvus_service_fixture: MilvusVectorDBService, + idx_part_collection_config_fixture: dict, + data_fixture: list[dict]): + # Create a collection. + collection_name = "test_count_collection" + milvus_service_fixture.create(collection_name, **idx_part_collection_config_fixture) + + # Insert data into the collection. + milvus_service_fixture.insert(collection_name, data_fixture) + + # Get the count of entities in the collection. + count = milvus_service_fixture.count(collection_name) + assert count == len(data_fixture) + + # Clean up the collection. + milvus_service_fixture.drop(collection_name) + + +@pytest.mark.slow +def test_overwrite_collection_on_create(milvus_service_fixture: MilvusVectorDBService, + idx_part_collection_config_fixture: dict, + data_fixture: list[dict]): + # Create a collection. + collection_name = "test_overwrite_collection" + milvus_service_fixture.create(collection_name, **idx_part_collection_config_fixture) + + # Insert data to the collection. + response1 = milvus_service_fixture.insert(collection_name, data_fixture) + assert response1["insert_count"] == len(data_fixture) + + # Create the same collection again with overwrite=True. + milvus_service_fixture.create(collection_name, overwrite=True, **idx_part_collection_config_fixture) + + # Insert different data into the collection. + data2 = [{"id": i, "embedding": [i / 10] * 10, "age": 26 + i} for i in range(10)] + + response2 = milvus_service_fixture.insert(collection_name, data2) + assert response2["insert_count"] == len(data2) + + # Retrieve the data from the collection and check if it matches the second set of data. + retrieved_data = milvus_service_fixture.retrieve_by_keys(collection_name, list(range(10))) + for i in range(10): + assert retrieved_data[i]["age"] == data2[i]["age"] + + # Clean up the collection. + milvus_service_fixture.drop(collection_name) + + +@pytest.mark.slow +def test_insert_into_partition(milvus_service_fixture: MilvusVectorDBService, + idx_part_collection_config_fixture: dict, + data_fixture: list[dict]): + # Create a collection with a partition. + collection_name = "test_partition_collection" + partition_name = idx_part_collection_config_fixture["collection_conf"]["partition_conf"]["partitions"][0]["name"] + milvus_service_fixture.create(collection_name, **idx_part_collection_config_fixture) + + # Insert data into the specified partition. + response = milvus_service_fixture.insert(collection_name, + data_fixture, + collection_conf={"partition_name": partition_name}) + assert response["insert_count"] == len(data_fixture) + + # Retrieve inserted data by primary keys. + keys_to_retrieve = [2, 4, 6] + retrieved_data = milvus_service_fixture.retrieve_by_keys(collection_name, + keys_to_retrieve, + partition_names=[partition_name]) + assert len(retrieved_data) == len(keys_to_retrieve) + + retrieved_data_default_part = milvus_service_fixture.retrieve_by_keys(collection_name, + keys_to_retrieve, + partition_names=["_default"]) + assert len(retrieved_data_default_part) == 0 + assert len(retrieved_data_default_part) != len(keys_to_retrieve) + + # Raises error if resource is partition and not passed partition name. + with pytest.raises(ValueError, match="Mandatory argument 'partition_name' is required when resource='partition'"): + milvus_service_fixture.drop(name=collection_name, resource="partition") + + # Clean up the partition + milvus_service_fixture.drop(name=collection_name, resource="partition", partition_name=partition_name) + + # Raises error if resource is index and not passed partition name. + with pytest.raises(ValueError, + match="Mandatory arguments 'field_name' and 'index_name' are required when resource='index'"): + milvus_service_fixture.drop(name=collection_name, resource="index") + + milvus_service_fixture.drop(name=collection_name, + resource="index", + field_name="embedding", + index_name="_default_idx_") + + retrieved_data_after_part_drop = milvus_service_fixture.retrieve_by_keys(collection_name, keys_to_retrieve) + assert len(retrieved_data_after_part_drop) == 0 + + # Clean up the collection. + milvus_service_fixture.drop(collection_name) + + +@pytest.mark.slow +def test_update(milvus_service_fixture: MilvusVectorDBService, + simple_collection_config_fixture: dict, + data_fixture: list[dict]): + collection_name = "test_update_collection" + + # Create a collection with the specified schema configuration. + milvus_service_fixture.create(collection_name, **simple_collection_config_fixture) + + # Insert data to the collection. + milvus_service_fixture.insert(collection_name, data_fixture) + + # Use updated data to test the update/upsert functionality. + updated_data = [{ + "type": MILVUS_DATA_TYPE_MAP["int64"], "name": "id", "values": list(range(5, 12)) + }, + { + "type": MILVUS_DATA_TYPE_MAP["float_vector"], + "name": "embedding", + "values": [[i / 5.0] * 10 for i in range(5, 12)] + }, { + "type": MILVUS_DATA_TYPE_MAP["int64"], "name": "age", "values": [25 + i for i in range(5, 12)] + }] + + # Apply update/upsert on updated_data. + result_dict = milvus_service_fixture.update(collection_name, updated_data) + + assert result_dict["upsert_count"] == 7 + assert result_dict["insert_count"] == 7 + assert result_dict["succ_count"] == 7 + + # Clean up the collection. + milvus_service_fixture.drop(collection_name) + + +@pytest.mark.slow +def test_delete_by_keys(milvus_service_fixture: MilvusVectorDBService, + idx_part_collection_config_fixture: dict, + data_fixture: list[dict]): + # Create a collection. + collection_name = "test_delete_by_keys_collection" + milvus_service_fixture.create(collection_name, **idx_part_collection_config_fixture) + + # Insert data into the collection. + milvus_service_fixture.insert(collection_name, data_fixture) + + # Delete data by keys from the collection. + keys_to_delete = [2, 4, 6] + response = milvus_service_fixture.delete_by_keys(collection_name, keys_to_delete) + assert response == keys_to_delete + + # Clean up the collection. + milvus_service_fixture.drop(collection_name) + + +@pytest.mark.slow +def test_delete(milvus_service_fixture: MilvusVectorDBService, + idx_part_collection_config_fixture: dict, + data_fixture: list[dict]): + # Create a collection. + collection_name = "test_delete_collection" + milvus_service_fixture.create(collection_name, **idx_part_collection_config_fixture) + + # Insert data into the collection. + milvus_service_fixture.insert(collection_name, data_fixture) + + # Delete expression. + delete_expr = "id in [0,1]" + + # Delete data from the collection using the expression. + delete_response = milvus_service_fixture.delete(collection_name, delete_expr) + assert delete_response["delete_count"] == 2 + + response = milvus_service_fixture.search(collection_name, query="id > 0") + assert len(response) == len(data_fixture) - 2 + + for item in response: + assert item["id"] > 1 + + # Clean up the collection. + milvus_service_fixture.drop(collection_name) + + +@pytest.mark.slow +def test_single_instance_with_collection_lock(milvus_service_fixture: MilvusVectorDBService, + idx_part_collection_config_fixture: dict, + data_fixture: list[dict]): + + # Create a collection. + collection_name = "test_insert_and_search_order_with_collection_lock" + milvus_service_fixture.create(collection_name, **idx_part_collection_config_fixture) + + filter_query = "age == 26 or age == 27" + search_vec = np.random.random((1, 10)) + execution_order = [] + + def insert_data(): + result = milvus_service_fixture.insert(collection_name, data_fixture) + assert result['insert_count'] == len(data_fixture) + execution_order.append("Insert Executed") + + def search_data(): + result = milvus_service_fixture.search(collection_name, data=search_vec, filter=filter_query) + execution_order.append("Search Executed") + assert isinstance(result, list) + + def count_entities(): + milvus_service_fixture.count(collection_name) + execution_order.append("Count Collection Entities Executed") + + with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor: + executor.submit(insert_data) + executor.submit(search_data) + executor.submit(count_entities) + + # Assert the execution order + assert execution_order == ["Count Collection Entities Executed", "Insert Executed", "Search Executed"] + + +@pytest.mark.slow +def test_multi_instance_with_collection_lock(milvus_service_fixture: MilvusVectorDBService, + idx_part_collection_config_fixture: dict, + data_fixture: list[dict], + milvus_server_uri: str): + + milvus_service_2 = MilvusVectorDBService(uri=milvus_server_uri) + + collection_name = "test_insert_and_search_order_with_collection_lock" + filter_query = "age == 26 or age == 27" + search_vec = np.random.random((1, 10)) + + execution_order = [] + + def create_collection(): + milvus_service_fixture.create(collection_name, **idx_part_collection_config_fixture) + execution_order.append("Create Executed") + + def insert_data(): + result = milvus_service_2.insert(collection_name, data_fixture) + assert result['insert_count'] == len(data_fixture) + execution_order.append("Insert Executed") + + def search_data(): + result = milvus_service_fixture.search(collection_name, data=search_vec, filter=filter_query) + execution_order.append("Search Executed") + assert isinstance(result, list) + + with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor: + executor.submit(create_collection) + executor.submit(insert_data) + executor.submit(search_data) + + # Assert the execution order + assert execution_order == ["Create Executed", "Insert Executed", "Search Executed"] + + +def test_get_collection_lock(): + collection_name = "test_collection_lock" + lock = MilvusVectorDBService.get_collection_lock(collection_name) + assert "lock" == type(lock).__name__ + assert collection_name in MilvusVectorDBService._collection_locks diff --git a/tests/test_milvus_write_to_vector_db_stage_pipe.py b/tests/test_milvus_write_to_vector_db_stage_pipe.py new file mode 100755 index 0000000000..e065e54307 --- /dev/null +++ b/tests/test_milvus_write_to_vector_db_stage_pipe.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python +# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import os +import random + +import pytest + +import cudf + +from _utils import TEST_DIRS +from morpheus.config import Config +from morpheus.messages import ControlMessage +from morpheus.modules import to_control_message # noqa: F401 # pylint: disable=unused-import +from morpheus.pipeline import LinearPipeline +from morpheus.service.milvus_vector_db_service import MilvusVectorDBService +from morpheus.stages.general.linear_modules_stage import LinearModulesStage +from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage +from morpheus.stages.output.in_memory_sink_stage import InMemorySinkStage +from morpheus.stages.output.write_to_vector_db import WriteToVectorDBStage +from morpheus.utils.module_ids import MORPHEUS_MODULE_NAMESPACE +from morpheus.utils.module_ids import TO_CONTROL_MESSAGE + + +@pytest.fixture(scope="function", name="milvus_service_fixture") +def milvus_service(milvus_server_uri: str): + service = MilvusVectorDBService(uri=milvus_server_uri) + yield service + + +def get_test_df(num_input_rows): + + df = cudf.DataFrame({ + "id": list(range(num_input_rows)), + "age": [random.randint(20, 40) for i in range(num_input_rows)], + "embedding": [[random.random() for _ in range(10)] for _ in range(num_input_rows)] + }) + + return df + + +def create_milvus_collection(collection_name: str, conf_file: str, service: MilvusVectorDBService): + + conf_filepath = os.path.join(TEST_DIRS.tests_data_dir, "service", conf_file) + + with open(conf_filepath, 'r', encoding="utf-8") as json_file: + collection_config = json.load(json_file) + + service.create(name=collection_name, overwrite=True, **collection_config) + + +@pytest.mark.slow +@pytest.mark.use_cpp +@pytest.mark.parametrize("use_instance, num_input_rows, expected_num_output_rows", [(True, 5, 5), (False, 5, 5)]) +def test_write_to_vector_db_stage_pipe(milvus_service_fixture: MilvusVectorDBService, + milvus_server_uri: str, + use_instance: bool, + config: Config, + num_input_rows: int, + expected_num_output_rows: int): + + collection_name = "test_stage_insert_collection" + + # Create milvus collection using config file. + create_milvus_collection(collection_name, "milvus_idx_part_collection_conf.json", milvus_service_fixture) + df = get_test_df(num_input_rows) + + to_cm_module_config = { + "module_id": TO_CONTROL_MESSAGE, "module_name": "to_control_message", "namespace": MORPHEUS_MODULE_NAMESPACE + } + + pipe = LinearPipeline(config) + pipe.set_source(InMemorySourceStage(config, [df])) + pipe.add_stage( + LinearModulesStage(config, + to_cm_module_config, + input_port_name="input", + output_port_name="output", + output_type=ControlMessage)) + + # Provide partition name to insert data into the partition otherwise goes to '_default' partition. + resource_kwargs = {"collection_conf": {"partition_name": "age_partition"}} + + if use_instance: + # Instantiate stage with service instance and insert options. + write_to_vdb_stage = WriteToVectorDBStage(config, + resource_name=collection_name, + service=milvus_service_fixture, + resource_kwargs=resource_kwargs) + else: + # Instantiate stage with service name, uri and insert options. + write_to_vdb_stage = WriteToVectorDBStage(config, + resource_name=collection_name, + service="milvus", + uri=milvus_server_uri, + resource_kwargs=resource_kwargs) + + pipe.add_stage(write_to_vdb_stage) + sink_stage = pipe.add_stage(InMemorySinkStage(config)) + pipe.run() + + messages = sink_stage.get_messages() + + assert len(messages) == 1 + assert isinstance(messages[0], ControlMessage) + assert messages[0].has_metadata("insert_response") + + # Insert entities response as a dictionary. + response = messages[0].get_metadata("insert_response") + + assert response["insert_count"] == expected_num_output_rows + assert response["succ_count"] == expected_num_output_rows + assert response["err_count"] == 0 diff --git a/tests/tests_data/service/milvus_idx_part_collection_conf.json b/tests/tests_data/service/milvus_idx_part_collection_conf.json new file mode 100644 index 0000000000..4d652055eb --- /dev/null +++ b/tests/tests_data/service/milvus_idx_part_collection_conf.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed114c4945f46cd7748ff96c868ddc02940aa3fa4dcc3857a23225f8943fe292 +size 1057 diff --git a/tests/tests_data/service/milvus_simple_collection_conf.json b/tests/tests_data/service/milvus_simple_collection_conf.json new file mode 100644 index 0000000000..399463cfeb --- /dev/null +++ b/tests/tests_data/service/milvus_simple_collection_conf.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4d5984904a207dad3a58515b1327f309542b80729a6172b02399ad67a6a83cd +size 766 From 78c6e3aaaf58c447c1fbd825f0ab726cdcd1e99b Mon Sep 17 00:00:00 2001 From: Eli Fajardo Date: Fri, 13 Oct 2023 17:40:31 -0400 Subject: [PATCH 59/65] Use conda environment yaml's for training-tuning-scripts (#1256) - Replace `requirements.txt`files (pip) with `requirements.yml` (conda environment). - Update dependency versions. Closes #1226 Closes #1227 ## By Submitting this PR I confirm: - I am familiar with the [Contributing Guidelines](https://github.com/nv-morpheus/Morpheus/blob/main/docs/source/developer_guide/contributing.md). - When the PR is ready for review, new or existing tests cover these changes. - When the PR is ready for review, the documentation is up to date with these changes. Authors: - Eli Fajardo (https://github.com/efajardo-nv) Approvers: - Michael Demoret (https://github.com/mdemoret-nv) URL: https://github.com/nv-morpheus/Morpheus/pull/1256 --- .../abp-models/requirements.txt | 3 - .../abp-models/requirements.yml | 25 ++ .../dfp-models/hammah-20211017-script.py | 2 +- .../dfp-models/hammah-20211017.ipynb | 290 +++++++++--------- .../dfp-models/requirements.yml | 29 ++ .../fraud-detection-models/requirements.yml | 31 +- .../log-parsing-models/requirements.txt | 3 - .../log-parsing-models/requirements.yml | 30 ++ .../phishing-models/requirements.txt | 6 - .../phishing-models/requirements.yml | 30 ++ .../ransomware-models/requirements.txt | 1 - .../ransomware-models/requirements.yml | 24 ++ .../root-cause-models/requirements.txt | 6 - .../root-cause-models/requirements.yml | 30 ++ .../sid-models/requirements.txt | 2 - .../sid-models/requirements.yml | 30 ++ .../sid-models/sid-minibert-20230424.ipynb | 1 - 17 files changed, 360 insertions(+), 183 deletions(-) delete mode 100644 models/training-tuning-scripts/abp-models/requirements.txt create mode 100644 models/training-tuning-scripts/abp-models/requirements.yml create mode 100644 models/training-tuning-scripts/dfp-models/requirements.yml delete mode 100644 models/training-tuning-scripts/log-parsing-models/requirements.txt create mode 100644 models/training-tuning-scripts/log-parsing-models/requirements.yml delete mode 100644 models/training-tuning-scripts/phishing-models/requirements.txt create mode 100644 models/training-tuning-scripts/phishing-models/requirements.yml delete mode 100644 models/training-tuning-scripts/ransomware-models/requirements.txt create mode 100644 models/training-tuning-scripts/ransomware-models/requirements.yml delete mode 100644 models/training-tuning-scripts/root-cause-models/requirements.txt create mode 100644 models/training-tuning-scripts/root-cause-models/requirements.yml delete mode 100644 models/training-tuning-scripts/sid-models/requirements.txt create mode 100644 models/training-tuning-scripts/sid-models/requirements.yml diff --git a/models/training-tuning-scripts/abp-models/requirements.txt b/models/training-tuning-scripts/abp-models/requirements.txt deleted file mode 100644 index cea0ab243f..0000000000 --- a/models/training-tuning-scripts/abp-models/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -xgboost==1.6.2 -sklearn==0.23.1 -chardet==5.1.0 \ No newline at end of file diff --git a/models/training-tuning-scripts/abp-models/requirements.yml b/models/training-tuning-scripts/abp-models/requirements.yml new file mode 100644 index 0000000000..93b5b4c00d --- /dev/null +++ b/models/training-tuning-scripts/abp-models/requirements.yml @@ -0,0 +1,25 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +channels: + - rapidsai + - nvidia + - conda-forge +dependencies: + - cuml=23.06 + - jupyterlab + - python=3.10 + - scikit-learn=1.2.2 + - xgboost diff --git a/models/training-tuning-scripts/dfp-models/hammah-20211017-script.py b/models/training-tuning-scripts/dfp-models/hammah-20211017-script.py index 0d7e2cb0eb..8e77ce04d2 100644 --- a/models/training-tuning-scripts/dfp-models/hammah-20211017-script.py +++ b/models/training-tuning-scripts/dfp-models/hammah-20211017-script.py @@ -102,7 +102,7 @@ def main(): min_cats=1 # cut off for minority categories ) - model.fit(x_train, epochs=25, validation_data=x_val) + model.fit(x_train, epochs=25, validation_data=x_val, run_validation=True) torch.save(model.state_dict(), args.trainingdata[:-4] + ".pkl") with open(args.trainingdata[:-4] + 'dill' + '.pkl', 'wb') as f: diff --git a/models/training-tuning-scripts/dfp-models/hammah-20211017.ipynb b/models/training-tuning-scripts/dfp-models/hammah-20211017.ipynb index b8fecafd61..832a306d62 100644 --- a/models/training-tuning-scripts/dfp-models/hammah-20211017.ipynb +++ b/models/training-tuning-scripts/dfp-models/hammah-20211017.ipynb @@ -62,7 +62,7 @@ "import torch\n", "\n", "from morpheus.models.dfencoder import AutoEncoder\n", - "from morpheus.utils.seed import manual_seed" + "from morpheus.utils.seed import manual_seed\n" ] }, { @@ -81,7 +81,7 @@ "source": [ "TRAINING_DATA_PATH = \"../../datasets/training-data/dfp-cloudtrail-user123-training-data.csv\"\n", "VAL_DATA_PATH = \"../../datasets/validation-data/dfp-cloudtrail-user123-validation-data-input.csv\"\n", - "OUTPUT_MODEL_NAME = \"hammah-user123-20211017.pkl\"" + "OUTPUT_MODEL_NAME = \"hammah-user123-20211017.pkl\"\n" ] }, { @@ -94,7 +94,7 @@ "\n", "# TRAINING_DATA_PATH = \"../datasets/training-data/hammah-role-g-training-data.csv\"\n", "# VAL_DATA_PATH = \"../datasets/validation-data/hammah-role-g-validation-data.csv\"\n", - "# OUTPUT_MODEL_NAME = \"hammah-role-g-20211017.pkl\"" + "# OUTPUT_MODEL_NAME = \"hammah-role-g-20211017.pkl\"\n" ] }, { @@ -104,7 +104,7 @@ "outputs": [], "source": [ "X_train=pd.read_csv(TRAINING_DATA_PATH)\n", - "X_val=pd.read_csv(VAL_DATA_PATH)" + "X_val=pd.read_csv(VAL_DATA_PATH)\n" ] }, { @@ -121,7 +121,7 @@ "metadata": {}, "outputs": [], "source": [ - "features=['eventSource', 'eventName', 'sourceIPAddress', 'userAgent','userIdentitytype', 'requestParametersroleArn', 'requestParametersroleSessionName','requestParametersdurationSeconds', 'responseElementsassumedRoleUserassumedRoleId','responseElementsassumedRoleUserarn', 'apiVersion', 'userIdentityprincipalId','userIdentityarn', 'userIdentityaccountId', 'userIdentityaccessKeyId','userIdentitysessionContextsessionIssuerprincipalId', 'userIdentitysessionContextsessionIssueruserName','tlsDetailsclientProvidedHostHeader', 'requestParametersownersSetitems','requestParametersmaxResults', 'requestParametersinstancesSetitems','errorCode', 'errorMessage', 'requestParametersmaxItems','responseElementsrequestId', 'responseElementsinstancesSetitems','requestParametersgroupSetitems', 'requestParametersinstanceType','requestParametersmonitoringenabled', 'requestParametersdisableApiTermination','requestParametersebsOptimized', 'responseElementsreservationId', 'requestParametersgroupName'] #NO userIdentitysessionContextsessionIssuerarn,userIdentityuserName" + "features=['eventSource', 'eventName', 'sourceIPAddress', 'userAgent','userIdentitytype', 'requestParametersroleArn', 'requestParametersroleSessionName','requestParametersdurationSeconds', 'responseElementsassumedRoleUserassumedRoleId','responseElementsassumedRoleUserarn', 'apiVersion', 'userIdentityprincipalId','userIdentityarn', 'userIdentityaccountId', 'userIdentityaccessKeyId','userIdentitysessionContextsessionIssuerprincipalId', 'userIdentitysessionContextsessionIssueruserName','tlsDetailsclientProvidedHostHeader', 'requestParametersownersSetitems','requestParametersmaxResults', 'requestParametersinstancesSetitems','errorCode', 'errorMessage', 'requestParametersmaxItems','responseElementsrequestId', 'responseElementsinstancesSetitems','requestParametersgroupSetitems', 'requestParametersinstanceType','requestParametersmonitoringenabled', 'requestParametersdisableApiTermination','requestParametersebsOptimized', 'responseElementsreservationId', 'requestParametersgroupName'] #NO userIdentitysessionContextsessionIssuerarn,userIdentityuserName\n" ] }, { @@ -135,7 +135,7 @@ " X_train=X_train.drop(i,axis=1)\n", "for i in list(X_val):\n", " if i not in features:\n", - " X_val=X_val.drop(i,axis=1)" + " X_val=X_val.drop(i,axis=1)\n" ] }, { @@ -177,7 +177,7 @@ } ], "source": [ - "X_train.dtypes" + "X_train.dtypes\n" ] }, { @@ -195,7 +195,7 @@ "outputs": [], "source": [ "X_train = X_train.dropna(axis=1, how='all')\n", - "X_val = X_val.dropna(axis=1, how='all')" + "X_val = X_val.dropna(axis=1, how='all')\n" ] }, { @@ -206,7 +206,7 @@ "source": [ "for i in list(X_val):\n", " if i not in list(X_train):\n", - " X_val=X_val.drop([i],axis=1)" + " X_val=X_val.drop([i],axis=1)\n" ] }, { @@ -217,7 +217,7 @@ "source": [ "for i in list(X_train):\n", " if i not in list(X_val):\n", - " X_train=X_train.drop([i],axis=1)" + " X_train=X_train.drop([i],axis=1)\n" ] }, { @@ -244,16 +244,16 @@ " encoder_layers = [512, 500], #layers of the encoding part\n", " decoder_layers = [512], #layers of the decoding part\n", " activation='relu', #activation function\n", - " swap_p=0.2, #noise parameter\n", - " lr = 0.01, # learning rate\n", - " lr_decay=.99, # learning decay\n", + " swap_probability=0.2, #noise parameter\n", + " learning_rate = 0.01, # learning rate\n", + " learning_rate_decay=.99, # learning decay\n", " batch_size=512,\n", - " logger='ipynb', \n", + " logger='ipynb',\n", " verbose=False,\n", " optimizer='sgd', #SGD optimizer is selected(Stochastic gradient descent)\n", " scaler='gauss_rank', #feature scaling method\n", " min_cats=1 #cut off for minority categories\n", - ")" + ")\n" ] }, { @@ -281,19 +281,17 @@ "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAEKCAYAAAAYd05sAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAArwklEQVR4nO3deZwU1b338c+vl5mengWQRY1LECPgMAODgBpJQE2uqBgibhCDCkZJ3G7MvRI0eWJc8jyXR00uMS65misa9SoGRYl61aAg+LggEExYr2JQQGSTGWbrmV7O80f1DAPMwCDT00PN9/16lV2nuqrO70zjr6pPVZ8y5xwiIuI/gWwHICIimaEELyLiU0rwIiI+pQQvIuJTSvAiIj6lBC8i4lMZTfBm1tXMZpnZajNbZWZfz2R9IiKySyjD+/8t8Ipz7iIzywGiGa5PRETSLFM/dDKzIuADoI/Tr6lERNpdJs/g+wBbgRlmNghYAvzYOVfddCUzmwxMBsjPzx/Sv3//DIYkIuIvS5Ys2eac69nce5k8gx8KvAsMd869Z2a/BXY6537R0jZDhw51ixcvzkg8IiJ+ZGZLnHNDm3svkxdZNwAbnHPvpcuzgJMyWJ+IiDSRsQTvnPscWG9m/dKLvgWszFR9IiKyu0zfRXMD8GT6DpqPgUkZrk9ERNIymuCdc8uAZvuGRCRz4vE4GzZsIBaLZTsUaSORSISjjz6acDjc6m0yfQYvIlmwYcMGCgsL6d27N2aW7XDkIDnn2L59Oxs2bOC4445r9XYaqkDEh2KxGN27d1dy9wkzo3v37gf8jUwJXsSnlNz95ct8nkrwIiI+pQQvIm2uvLycBx544Ette+6551JeXt7q9W+77TbuueeeL1WX3ynBi0ib21eCTyaT+9z25ZdfpmvXrhmIqvNRgheRNnfzzTezdu1aysrKmDJlCvPnz+eMM87g0ksvpbS0FIDzzz+fIUOGMGDAAB566KHGbXv37s22bdtYt24dJ554IldffTUDBgzgrLPOora2dp/1Llu2jFNPPZWBAwcyduxYduzYAcC9995LcXExAwcOZPz48QC8+eablJWVUVZWxuDBg6msrMzQXyN7dJukiN8tuRF2LGvbfXYrgyHTW3x72rRpLF++nGXLvHrnz5/PokWLWL58eeNtfo888giHHXYYtbW1DBs2jAsvvJDu3bvvtp8PP/yQp556iocffphLLrmEZ599lgkTJrRY7+WXX87vfvc7Ro4cya233srtt9/O9OnTmTZtGv/4xz/Izc1t7P655557uP/++xk+fDhVVVVEIpGD+Yt0SDqDF5F2cfLJJ+92D/e9997LoEGDOPXUU1m/fj0ffvjhXtscd9xxlJWVATBkyBDWrVvX4v4rKiooLy9n5MiRAFxxxRUsWLAAgIEDB/L973+fJ554glDIO68dPnw4//Iv/8K9995LeXl543I/8V+LRGR3+zjTbk/5+fmN8/Pnz2fu3Lm88847RKNRTj/99Gbv8c7NzW2cDwaD++2iaclLL73EggULmDNnDnfeeScrVqzg5ptvZvTo0bz88suceuqpzJ07F78NV64zeBFpc4WFhfvs066oqKBbt25Eo1FWr17Nu+++e9B1dunShW7durFw4UIAHn/8cUaOHEkqlWL9+vWcccYZ3HXXXZSXl1NVVcXatWspLS1l6tSpDB06lNWrVx90DB2NzuBFpM11796d4cOHU1JSwjnnnMPo0aN3e//ss8/m97//PQMHDqRfv36ceuqpbVLvY489xo9+9CNqamro06cPM2bMIJlMMmHCBCoqKnDO8ZOf/ISuXbvyi1/8gnnz5hEMBikuLuacc85pkxg6kow98OPL0AM/RNrGqlWrOPHEE7MdhrSx5j7XbD3wQ0REskgJXkTEp5TgRUR8SgleRMSnlOBFRHxKCV5ExKeU4EWkza1bt46SkpKM7X/+/Pmcd955AMyZM4dp06ZlrK59aRgYrbXL25t+6CQih7QxY8YwZsyYbIfRIekMXkQyIpFIcMUVVzBw4EAuuugiampqALjjjjsYNmwYJSUlTJ48mYYfWzY3pG91dTVXXnklw4YNY/Dgwbzwwgt71fPoo49y/fXXAzBx4kT++Z//mdNOO40+ffowa9asxvXuvvtuhg0bxsCBA/nlL3+5134efPBBfvrTn+623xtuuAFoeWjj1vjNb35DSUkJJSUlTJ8+vbFdo0ePZtCgQZSUlDBz5kzAG2a54W9w0003HVA9zXLOdZhpyJAhTkQO3sqVK3crjxzp3IwZ3nx9vVd+/HGvXF3tlZ9+2iuXl3vlZ5/1ylu3euU5c7zypk37r/8f//iHA9xbb73lnHNu0qRJ7u6773bOObd9+/bG9SZMmODmpHd85JFHulgs5pxzbseOHc4552655Rb3eDrQHTt2uBNOOMFVVVW5efPmudGjRzvnnJsxY4a77rrrnHPOXXHFFe6iiy5yyWTSrVixwh1//PHOOedeffVVd/XVV7tUKuWSyaQbPXq0e/PNN3eLecuWLY3rO+fc2Wef7RYuXLhbzDU1NW7AgAFu27ZtzjnnvvrVr7qtW7fu1f6G5YsXL3YlJSWuqqrKVVZWuuLiYrd06VI3a9Ysd9VVVzWuX15e7rZv3+769u3rUqnUbn+Dpvb8XJ1zDljsWsipOoMXkYw45phjGD58OAATJkzgrbfeAmDevHmccsoplJaW8sYbb7BixQqg+SF9X3vtNaZNm0ZZWVnjiJOffvrpPus9//zzCQQCFBcXs3nz5sb9vPbaawwePJiTTjqJ1atX7zU8cc+ePenTpw/vvvsu27dvZ82aNY3xt2Zo4+a89dZbjB07lvz8fAoKCrjgggtYuHAhpaWlzJ07l6lTp7Jw4UK6dOlCUVERkUiEq666iueee45oNNrKv3TL1Acv0gnMn79rPhzevRyN7l7u0mX3co8eu5ePOKJ1dZrZXuVYLMa1117L4sWLOeaYY7jtttsahwlubkhf5xzPPvss/fr1221fDYm7OU2HGHbp7h/nHLfccgs//OEP9xnzuHHjeOaZZ+jfvz9jx47FzFo9tHFzGurfU9++fVmyZAkvv/wyt9xyC2eddRa33norixYt4vXXX+fpp5/mvvvu44033mhVPS3RGbyIZMSnn37KO++8A8BTTz3FN77xjcbE2KNHD6qqqhr7yFsa0nfUqFH87ne/a0yUf/3rX79ULKNGjeKRRx6hqqoKgI0bN7Jly5a91rvgggt4/vnneeqppxg3bhxwcEMbjxgxgueff56amhqqq6uZPXs23/zmN/nss8+IRqNMmDCBm266iaVLl1JVVUVFRQXnnnsu06dPb3wa1sHQGbyIZMSJJ57IY489xg9/+ENOOOEErrnmGqLRKFdffTWlpaX07t2bYcOGAexzSN8bb7yRgQMH4pyjd+/evPjiiwccy1lnncWqVav4+te/DkBBQQFPPPEEvXr12m29bt26UVxczMqVKzn55JOBgxva+KSTTmLixImN+7rqqqsYPHgwr776KlOmTCEQCBAOh3nwwQeprKzku9/9LrFYDOcc//7v/37A7dxTRocLNrN1QCWQBBKuhSEtG2i4YJG2oeGC/elAhwtujzP4M5xz2b/jX0Skk1EfvIiIT2U6wTvgNTNbYmaTM1yXiIg0kekumuHOuc/MrBfwFzNb7Zxb0HSFdOKfDHDsscdmOBwRkc4jo2fwzrnP0q9bgNnAyc2s85BzbqhzbmjPnj0zGY6ISKeSsQRvZvlmVtgwD5wFLM9UfSIisrtMnsEfDrxlZh8Ai4CXnHOvZLA+ETmEFRQUHNBy2b+M9cE75z4GBmVq/yIism+6TVJE2tzUqVN54IEHGsu33XYbv/71r6mqquJb3/oWJ510EqWlpc0O/9sS5xxTpkyhpKSE0tLSxiF2N23axIgRIygrK6OkpISFCxeSTCaZOHFi47pt8avQQ5GGKhDxuRtvhDYY1mQ3ZWWQHtq8WePHj+fGG2/k2muvBeCZZ57hlVdeIRKJMHv2bIqKiti2bRunnnoqY8aM2WtgsuY899xzLFu2jA8++IBt27YxbNgwRowYwX/9138xatQofv7zn5NMJqmpqWHZsmVs3LiR5cu9y37l5eUH3+hDkBK8iLS5wYMHs2XLFj777DO2bt1Kt27dOPbYY4nH4/zsZz9jwYIFBAIBNm7cyObNmzmiFUNUvvXWW3zve98jGAxy+OGHM3LkSN5//32GDRvGlVdeSTwe5/zzz6esrIw+ffrw8ccfc8MNNzB69GjOOuusdmh1x6MEL+Jz+zrTzqSLLrqIWbNm8fnnnzc+oenJJ59k69atLFmyhHA4TO/evQ966N0RI0awYMECXnrpJS677DKmTJnC5ZdfzgcffMCrr77K/fffzzPPPMMjjzzSZm07VKgPXkQyYvz48Tz99NPMmjWLiy66CPCG3u3VqxfhcJh58+bxySeftHp/I0aMYObMmSSTSbZu3cqCBQs4+eST+eSTT+jVqxdXX301P/jBD1i6dCnbtm0jlUpx4YUXcuedd7J06dJMNbND0xm8iGTEgAEDqKys5KijjuLII48E4Pvf/z7f+c53GDp0KGVlZfTv37/V+xs7dizvvPMOgwYNwsy46667OOKII3jssce4++67CYfDFBQU8Mc//pGNGzcyadIkUqkUAP/2b/+WkTZ2dBkdLvhAabhgkbah4YL96UCHC1YXjYiITynBi4j4lBK8iIhPKcGLiPiUEryIiE8pwYuI+JQSvIhkxGmnndbs8okTJzJr1qwvtc9ly5bx8ssvN5bnzJnDtGnTAHj++edZuXLll9pva7UU+8G0KZOU4EUkI95+++023+eeCX7MmDHcfPPNQPsk+EONEryIZETDgzqcc1x//fUUFxczevRotmzZ0rjOkiVLGDlyJEOGDGHUqFFs2rQJgNNPP52pU6dy8skn07dvXxYuXEh9fT233norM2fOpKysjJkzZ/Loo49y/fXX8/bbbzNnzhymTJlCWVkZa9eu5aSTTmqs58MPP2TIkCG7xbdq1SpOPnnXU0TXrVvHwIEDAbjjjjsYNmwYJSUlTJ48ucVxcJrz+uuvM3jwYEpLS7nyyiupq6sD4Oabb6a4uJiBAwdy0003AfCnP/2JkpISBg0axIgRIw7kz9sqGqpAxO8++hSqatp2nwVR+NqxrVp19uzZrFmzhr///e9s3ryZ4uLixtEfb7jhBl544QV69uzJzJkz+fnPf944KFgikWDRokW8/PLL3H777cydO5c77riDxYsXc9999wHw6KOPAl530JgxYzjvvPMax73p0qULy5Yto6ysjBkzZjBx4sTd4jrxxBOpr6/n448/pk+fPsycOZNLLrkEgOuvv55bb70VgMsuu4wXX3yR73znO/ttaywWY+LEibz++uv07duXyy+/nAcffJDLL7+c2bNns3r1asyscfjiO+64g1dffZWjjjoqI0Ma6wxeRDJqwYIFjcP8fuUrX+HMM88EYM2aNSxfvpx/+qd/oqysjF/96lds2LChcbsLLrgAgCFDhrBu3boDrveqq65ixowZJJNJZs6cyaWXXrrXOpdccgnPPPMMADNnzmTcuHEAzJs3j1NOOYXS0lLeeOMNVqxY0ao616xZw3HHHUffvn0BuOKKK1iwYAFFRUVEIhGuuuoqnnvuOaLRKADDhw9n4sSJPPzwwySTyQNu4/7oDF7E71p5pp1JzT3QwznHgAEDeOedd5rdJjc3F4BgMEgikTjgOi+88EJuv/12zjzzTIYMGUL37t33WmfcuHFcfPHFXHDBBZgZJ5xwArFYjGuvvZbFixdzzDHHcNtttx30kMahUIhFixbx+uuv8/TTT3Pffffxxhtv8Pvf/5733nuPl156ibKyMpYtW9ZsnF+WzuBFJKNGjBjB008/TTKZZNOmTcybNw+Afv36sXXr1sYEH4/H93umXFhYSGVlZavei0QijBo1imuuuYZJkyY1u83xxx9PMBjkzjvvbDx7b0jmPXr0oKqq6oDujunfvz/r1q3jo48+AuDxxx9n5MiRVFVVUVFRwbnnnsv06dNZln7E1tq1aznllFO444476NGjB+vXr291Xa2hBC8iGTV27FhOOOEESktLueaaaxg5ciQAOTk5zJo1i6lTpzJo0CDKysr2e+fNGWecwcqVKxsvsjY1fvx47r77bgYPHszatWsBb3hiM9vnE53GjRvHE0880dj/3rVrV66++mpKS0s5//zzGTZsWKvbGolEmDFjBhdffDGlpaUEAgF+9KMfUVlZyXnnncfAgQMZOXJk4zNip0yZQmlpKSUlJYwYMYJBgwa1uq7W0HDBIj6k4YI999xzDxUVFdx5553ZDqVNHOhwweqDFxFfGjt2LGvXruWNN97IdihZowQvIr40e/bsbIeQdeqDF/GpjtT9Kgfvy3yeSvAiPhSJRNi+fbuSvE8459i+fTuRSOSAtlMXjYgPHX300WzYsIGtW7dmOxRpI5FIhKOPPvqAtlGCF/GhcDjMcccdl+0wJMsy3kVjZkEz+6uZvZjpukREZJf26IP/MbCqHeoREZEmMprgzexoYDTwh0zWIyIie8v0Gfx04KdAqqUVzGyymS02s8W6ICQi0nYyluDN7Dxgi3Nuyb7Wc8495Jwb6pwb2rNnz0yFIyLS6WTyDH44MMbM1gFPA2ea2RMZrE9ERJrIWIJ3zt3inDvaOdcbGA+84ZybkKn6RERkd/olq4iIT7XLD52cc/OB+e1Rl4iIeHQGLyLiU0rwIiI+pQQvIuJTSvAiIj6lBC8i4lNK8CIiPqUELyLiU0rwIiI+pQQvIuJTSvAiIj6lBC8i4lNK8CIiPqUELyLiU0rwIiI+pQQvIuJTSvAiIj6lBC8i4lNK8CIiPqUELyLiU0rwIiI+pQQvIuJTSvAiIj6lBC8i4lNK8CIiPtWqBG9mPzazIvP8p5ktNbOzMh2ciIh8ea09g7/SObcTOAvoCUwCpmUsKhEROWitTfCWfj0XmOGc+6DJMhER6YBam+CXmNlreAn+VTMrBFL72sDMIma2yMw+MLMVZnb7wQYrIiKtF2rlej8AyoCPnXM1ZnYYXjfNvtQBZzrnqswsDLxlZv/tnHv3y4crIiKt1doz+K8Da5xz5WY2AfhfQMW+NnCeqnQxnJ7cl45UREQOSGsT/INAjZkNAn4KfAL8cX8bmVnQzJYBW4C/OOfea2adyWa22MwWb926tfWRi4jIPrU2wSeccw74LvBb59xvgcL9beScSzrnyoCjgZPNrKSZdR5yzg11zg3t2bPnAYQuIiL70toEX2lmtwCXAS+ZWRCvy6VVnHPlwHzg7AMNUEREvpzWJvhxeBdNr3TOfQ4cBdy9rw3MrKeZdU3P5wHfBlZ/+VBFRORAtCrBp5P6k0AXMzsPiDnn9tcHfyQwz8z+BryP1wf/4kFFKyIirdaq2yTN7BK8M/b5eD9w+p2ZTXHOzWppG+fc34DBbRGkiIgcuNbeB/9zYJhzbgt43S/AXKDFBC8iItnV2j74QENyT9t+ANuKiEgWtPYM/hUzexV4Kl0eB7ycmZBERKQttCrBO+emmNmFwHC8PviHnHOzMxqZiIgclNaeweOcexZ4NoOxiIhIG9pngjezSpofP8bwhpspykhUIiJy0PaZ4J1z+x2OQEREOibdCSMi4lNK8CIiPqUELyLiU0rwIiI+pQQvIuJTSvAiIj6lBC8i4lNK8CIiPqUELyLiU0rwIiI+pQQvIuJTSvAiIj6lBC8i4lNK8CIiPqUELyLiU0rwIiI+pQQvIuJTSvAiIj6lBC8i4lNK8CIiPpWxBG9mx5jZPDNbZWYrzOzHmapLRET2FsrgvhPAvzrnlppZIbDEzP7inFuZwTpFRCQtY2fwzrlNzrml6flKYBVwVKbqExGR3bVLH7yZ9QYGA+81895kM1tsZou3bt3aHuGIiHQKGU/wZlYAPAvc6Jzbuef7zrmHnHNDnXNDe/bsmelwREQ6jYwmeDML4yX3J51zz2WyLhER2V0m76Ix4D+BVc6532SqHhERaV4mz+CHA5cBZ5rZsvR0bgbrExGRJjJ2m6Rz7i3AMrV/ERHZN/2SVUTEpw79BO8cfPbfUL4i25GIiHQoh36CT1TC/xsPy2/PdiQiIh3KoZ/gw0VwwnXw6SzYuSbb0YiIdBiHfoIH6H8jBHNh5V3ZjkREpMPwR4KP9II+P4B1j0P1+mxHIyLSIfgjwQMUT/EuuK7+dbYjERHpEPyT4PO/Cr0vhY8ehpgGLRMR8U+CByi+GZK1sObebEciIpJ1/krwXU6Eo8+H/7kP4nsNXCki0qn4K8EDDLgF4uXw4X9kOxIRkazyRYKvrXFUVqS8QvdhcMS3YfVvIBnLbmAiIll0yCf4nTuSbHh+Be/M2rxrYfEtEPscPn40a3GJiGTbIZ/gi7oFqUmGKem2hZqq9Fn84WdA91O8Hz6lEtkNUEQkSw75BA8QPPZwvtI9zjtzdngLzLy++Op/wCczsxuciEiW+CLBD/hmF/6xOcIRyc2kks5beNR3oEsxrJwGLpXdAEVEssAXCd4CxrbI4Qw4toZ3XqlsWOj1xVcsh40vZjdAEZEs8EWCBxg8qjvbd4ZgQ5OLrV8dD/m9YcX/8YYxEBHpRHyT4EO5AT6s7snw/hX87d307ZGBEBT/FLa/B1vmZzU+EZH25psED1ByVi9i9cbmpU3O4vtMgsjhsOLfsheYiEgW+CrBFxwW5m+fd+cbX9vGp2vj3sJgBPr/BD7/C2xfnN0ARUTaka8SPMCxww8nL9ex/C9NRpQ84RoId4GVOosXkc7Ddwn+iD55fLC+iKFHbKH8i/TtkeEi6Hs9rJ8NFauyG6CISDvxXYIHyO9/BL26JXjnhS92Lez3Y6+7ZuX/zV5gIiLtyJcJ/mtDClm7OY/jwp9TX5e+PTLSE46/GtY9CdWfZDdAEZF24MsEjxnVXQ+n/zExFsxpMi78if/qva66JztxiYi0I38meKD0W4extSJMdPvmXb9xyj8WjrsM1v4BYluyGp+ISKZlLMGb2SNmtsXMlmeqjn3WHwzwSaIXp524k3dfr9n1RvFUSNbBmt9mIywRkXaTyTP4R4GzM7j//Sod1ZOaWIDKVU1++FTUD4650HusX2xb9oITEcmwjCV459wC4Iv9rphBufkhVn7RnZH9v2D13+K73ij5hXcWP/9cPbtVRHzLt33wDY4feTjhkOPD+U363LsNhG/8CXb8FeaPhkR19gIUEcmQrCd4M5tsZovNbPHWrVv3v8EB6vaVCH/f1JWv997Kpo3JXW8c/R047UnY9jYsGKvnt4qI72Q9wTvnHnLODXXODe3Zs2dG6ug+6HB6dEmwaM723d/46iVwyiPeODVvXQKpePM7EBE5BGU9wbeHo4sL+GhzlOIum6mu2mNc+D5XwND7YeOf4e3LIJVsficiIoeYTN4m+RTwDtDPzDaY2Q8yVVcrgiH5lSM44ag65j9Xsff7fa+FwXfDpzNh0VV6xJ+I+EIoUzt2zn0vU/v+Mvqd1pXP/5xD99rPSSa7EgzuscKJN0G8CpbfDsF8GPo77+HdIiKHqE7RRQNAIMDmUC9O7V/Fmy+2cNdM6S+9RP/h/bDsZj3mT0QOaZ0nwQMl3+5BZW2AxLrNza9gBmV3eePHr7oLlv+qfQMUEWlDnSrBB3NDfFTVkzNKdrDk7frmVzKDoffBcVfA32+FVb9p3yBFRNpIp0rwAH3P6EXAHOvfbeEsHsACcMof4NiL4a//Ch/+vv0CFBFpIxm7yNpR5XfP5W9bunF6v23c97NciEYo6Bnh6OPD9OtnHHUUBAJAIARffwIStfD+NRCMQp/Lsx2+iEirdboED9B7xJHw90quH/Vp47LKmgBr/jvC259F2FYToT4UIadrhMOO/BNf2/mv9Jv3zxTFd0KPUyHvCIgcDoFwFlshIrJv5jrQnSJDhw51ixcvbp/KnIP6OK46RsXnMaq2xHDVMfIDMQ6L7uqfT6Xgk805rFkfYeNWiMXqqK+vIZmoxrlaAsE6wrn1RKIp8rsE6d4rwmG98jns8K4cdsRhFPXsRihSBIE978sUETl4ZrbEOTe0ufc65Rk84F1Mzc3BcnPoelgRXYubvJdMQm0d8YoY5ZtihBIxygpr+UaojmhuioAFgaL0tPtmX1SG2F4RYvOnQVbuTFJRvYPaui+I1SWJJxIkEgmSqSQpkmAJCKQIhlIEwxCOGDl5AXIjAfKiAfKiQSJ5IfLyQ95rNExefphIfg55+blE8nPJy88lnKP79UVkb503we9LMAgFUcIFUXoetcd7zkEiCfEEJBIQT5CsS1C7M0FdVYJkoIai3BhdutXRlxShAOQEITcMoeCeibj5P388YdTEAtTWGzUxqC1PUbM5wc66FJvr4tTUxaitC1BTF6C2LkCsDuoTKRKJFPFkimQySTKVIplKkXIpnEvhLAWkIOCwAFjAYUFHKASRPEdurpGbS/o1PUUC5EaC5EYCRPKCRPPDRAvCRAtziBbkEi2MkF8UIZzT6a7VixwSlOAPlBmEQ96UFgQKvgIF+9s2lfJO8xPp12QSkilIJHHxBMlYDXVVtSTrkqQiCcKJFEWJBIVJBylvMrxbnwIGwYARCngHjvBeB4/9NgQwUimI1QeorTNq6wPpea8cqw9Quz1AbX2AiroA1bEAVbVGdW2Sqto6qmrjVNXWEKt3xBNxkok4yVQC5+JgcaAeswR5kTjRvCT5eQmieSny81NE8xzRKOTnQzRq5BcEiOYHKCgMU1AUpqBLhIIuEfK7RMnJy4dwAQRy9etikQOgBN+eAgFvaubarOF9GF/6A3HOO4CkUpBsOp9qeT6ZIJCMk5dMkBtPUBhPkoqncIk4LpnCpZocWNyuA0soAIFWJ9ogNbEgVbUBKmuDVNYE2VkdorImSGVtgModQTZsDFJZ472/szrIzpogO6sDVFR75dpYikSyEtxmojm1FOTVUpBXR0G0jvy8OPlR78ARzUsRjTrvgJEP0WiAaH6QaEGQaH7I+wZSmEt+US6FXfMp7JZPbmEXLKgDh/iTErxfmHldS8FgsweQfW6K9y2k1ZeBnfOmZGrXt5C9Xnd9S4kmU0STSXo1fFtJJEnF47hEApJJLJUiSOsu9sfqoToGVTVhdtbkegeE6iDl1SF2VOZQXhVm+5Yg62rTB40a74DRMO+VvXnnjFAwTlHeFxTmVVOYV0tRfi2F+XUU5scpKkxQWJCisMBRUAB5eZCXZ0SjeNdI8rwDSF40SF5+iGi+d70kL32NJDcawcJ53i22Af2vJu1P/+rkwJl5UyCwW1dVqzenmYNJ0wNGItnCa4pIIkkkmaR7w/KG95J1kKzBJVNYK+8Mq4s7auuhpg6qa2BnDeysDlBemcsXlflsr8hhS3kun66PUlEdoro2QFVtMN1V5b1WxwLUxAI41/w3gNxwjEi4itxQHZGcOnLDcSI5cXJzEkRy4kRyEuTmpojkJonkptJTej7iiEQgkpt+jUAkYkTyjEgkQCQaICcnSDgnRDg3RDgnRE4kTDg3TDgnTDiSk37NJZyb473mBPRlpRNRgpeOwQxCQW/KPYjdNHy7SOx+jWO3bxjpcm4iSW4ySdfEHgeTRPqAkWr9U74SKYgnvaku7k31Caird9TFIVbniNUZtXVQW2fUxIzqmFFTG6Yq3XVVWROmsibE5q1hKqpz2VGVR3UsnL6Y7l14TyQP/oJ2MJAgHIwTDjV9TRAOJb35ULJJOUlOONk4702pJstSjVNOOEW4YQo5QiHzpjCEQkYwGCAUblgW8KaQEQoFCIaCBENBAqEAwWCQQDBIMBQgEAwSCIYIhgMEAiECoaD3fihEMBRs3H84vd9wuq5QOEA4x5sP5wQIBLxrTgRCnepajhK8+EvDt4ucAAfcV7WnVDMHh926n1KNF85DyRShZIq8puummlwLSaW8csM2zgEOSAGJVofkbWGknJFs+NLjXPqyikuH50gmHYmGKeG841bCEU844gmoTx+A6uuhLm7Ux6Gu3qirh1h9+gJ7fYC69MX3uvoc6uIB6uqDxOqDlMdC1NUHqYuHiNV7U21dmNp4iFh9Dokk1McDxJNGMgne97bsMUsRDsYJBRMErdJ7DaQIBZMEgylCwVS6nCIUShEMOEIh5/V6BhyBgCMYTBEwl76U5hqXBwLsNt/wnrdvRzCQIhhM7zPYMO+VG+ouKnLcPP3bbd5uJXiRlgQCbXOgaE5LF8VbujCefrVUimAyRTDlCDc9cDR8c2mcT+1RbjKfSr+fak1XlgOS6ekgmovhzMDS8w1/gj1enXPpyZsHSDUu27XcpVzD9f/dv6ildn0JSySt8bXJXc3pY3bDgc+RSNB4EIwnGg6EDetb+oudkUoZyZSRTBqJpvPJgDffpJyIh6hL5VCfCHnlZJD6RJBEMkg8ESCeCBJPBhtfi6I7uXn6Qf2Jm6UEL5INB3FRvE01HmjcrvmGg0CqmYOEc17OZ4/ynu83bJueLOV2dZ+1dOBpvHbSUAe7lrk9ltNcvc3F2mRZxrTBE+DCIeArB7+fPSjBi3RmjQeabAfSDpoeRJo7OO1zefo/rrGw97K9Dkp7vrfHa9P1Apn5sWCH+gnimjXw6KPefDwOp58OTzzhlWtqvPLMmV65osIrP/ecV962zSv/+c9e+fPPvfIrr3jl9eu98ty5Xvnjj73ym2/uqvv00+Htt73y8uVe+f33vfKyZV552TKv/P77Xnn5cq/89tteec0ar/zmm17544+98ty5Xnn9eq/8yite+fPPvfKf/+yVt23zys8955Ur0o+QnTnTK9fUeOUnnvDK8bhXfvRRr9zg4Yfh20269B54AM45Z1f5t7+FMWN2le+5By68cFd52jQYP35X+c47YcKEXeVbb4VJk3aVb7kFJk/eVb7pJrjuul3lG2/0pgbXXeet02DyZG8fDSZN8upoMGGCF0OD8eO9GBtceKHXhgZjxnhtbHDOOd7foMG3v+39jRqcfrr+7fn+3176zq/rbghw09QghLwfLE6+Pswtt+VAbg5Ecpl0bYRbp0UgPw8Kokz4UZQ7p0ehMB+K8hk/uYBpDxRAlwLoUsiFPyjknj8UQTdvGjOxC799vAt07wo9unLOZd144E/doOdh0Oswvn1pdx6e0x2O6AFH9OD07/Xk0f/uDny5f3v70qESvIiItJ3OO5qkiIgP7Gs0SZ3Bi4j4lBK8iIhPKcGLiPiUEryIiE8pwYuI+JQSvIiITynBi4j4VEYTvJmdbWZrzOwjM7s5k3WJiMjuMpbgzSwI3A+cAxQD3zOz4kzVJyIiu8vkGfzJwEfOuY+dc/XA08B3M1ifiIg0kcnRJI8C1jcpbwBO2XMlM5sMNAwVVGVma4AewH6G0fG1ztx+tb3z6sztP5i2f7WlNzKZ4Jt7hMteA9845x4CHtptQ7PFLY2t0Bl05var7Z2z7dC525+ptmeyi2YDcEyT8tHAZxmsT0REmshkgn8fOMHMjjOzHGA8MCeD9YmISBMZ66JxziXM7HrgVbznxTzinFvRys0f2v8qvtaZ26+2d16duf0ZaXuHGg9eRETajn7JKiLiU0rwIiI+1eESfGce3sDM1pnZ381smZn5/tmFZvaImW0xs+VNlh1mZn8xsw/Tr92yGWOmtND228xsY/rzX2Zm52Yzxkwxs2PMbJ6ZrTKzFWb24/TyzvLZt9T+Nv/8O1QffHp4g/8B/gnvNsv3ge8551ZmNbB2YmbrgKHOuU7xYw8zGwFUAX90zpWkl90FfOGcm5Y+wHdzzk3NZpyZ0ELbbwOqnHP3ZDO2TDOzI4EjnXNLzawQWAKcD0ykc3z2LbX/Etr48+9oZ/Aa3qATcc4tAL7YY/F3gcfS84/h/cP3nRba3ik45zY555am5yuBVXi/fO8sn31L7W9zHS3BNze8QUYa3kE54DUzW5IewqEzOtw5twm8/xGAXlmOp71db2Z/S3fh+LKLoikz6w0MBt6jE372e7Qf2vjz72gJvlXDG/jYcOfcSXgjcF6X/hovnceDwPFAGbAJ+HVWo8kwMysAngVudM7tzHY87a2Z9rf559/REnynHt7AOfdZ+nULMBuvy6qz2Zzuo2zoq9yS5XjajXNus3Mu6ZxLAQ/j48/fzMJ4ye1J59xz6cWd5rNvrv2Z+Pw7WoLvtMMbmFl++oILZpYPnAUs3/dWvjQHuCI9fwXwQhZjaVcNyS1tLD79/M3MgP8EVjnnftPkrU7x2bfU/kx8/h3qLhqA9K1B09k1vMH/zm5E7cPM+uCdtYM3hMR/+b3tZvYUcDreUKmbgV8CzwPPAMcCnwIXO+d8dzGyhbafjvf13AHrgB829En7iZl9A1gI/B1IpRf/DK8fujN89i21/3u08eff4RK8iIi0jY7WRSMiIm1ECV5ExKeU4EVEfEoJXkTEp5TgRUR8Sgle5CCY2elm9mK24xBpjhK8iIhPKcFLp2BmE8xsUXqc7f8ws6CZVZnZr81sqZm9bmY90+uWmdm76UGfZjcM+mRmXzOzuWb2QXqb49O7LzCzWWa22syeTP9SETObZmYr0/vx9RDA0jEpwYvvmdmJwDi8wdzKgCTwfSAfWJoe4O1NvF+TAvwRmOqcG4j3a8OG5U8C9zvnBgGn4Q0IBd5ogDcCxUAfYLiZHYb3c/MB6f38KpNtFGmOErx0Bt8ChgDvm9mydLkP3s/EZ6bXeQL4hpl1Abo6595ML38MGJEeJ+go59xsAOdczDlXk15nkXNuQ3qQqGVAb2AnEAP+YGYXAA3rirQbJXjpDAx4zDlXlp76Oedua2a9fY3b0dxQ1g3qmswngZBzLoE3GuCzeA+ueOXAQhY5eErw0hm8DlxkZr2g8dmfX8X7939Rep1LgbeccxXADjP7Znr5ZcCb6fG6N5jZ+el95JpZtKUK02N9d3HOvYzXfVPW5q0S2Y9QtgMQyTTn3Eoz+194T8sKAHHgOqAaGGBmS4AKvH568Iaq/X06gX8MTEovvwz4DzO7I72Pi/dRbSHwgplF8M7+f9LGzRLZL40mKZ2WmVU55wqyHYdIpqiLRkTEp3QGLyLiUzqDFxHxKSV4ERGfUoIXEfEpJXgREZ9SghcR8an/D+yRHfqaPgh1AAAAAElFTkSuQmCC", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAioAAAG2CAYAAABRfK0WAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABPLUlEQVR4nO3deXxU1f3/8dedmcyWSSYk7BgWZZdFZFHAtS4ILQWxdYEqFJei4EaVitq6VtTWBRewWn9IrYqtAlo3xK+CKKLIXtkxkIjQyJaQbZKZub8/bphkIEAIkLkk7+fjcR9z59wzcz+53DrvnnvmjmGapomIiIiIDTkSXYCIiIjIwSioiIiIiG0pqIiIiIhtKaiIiIiIbSmoiIiIiG0pqIiIiIhtKaiIiIiIbSmoiIiIiG0pqIiIiIhtKaiIiIiIbSU8qGzdupXf/OY3ZGRk4Pf7Oe2001iyZEmiyxIREREbcCVy57t376Z///6cf/75fPjhhzRu3JhNmzaRlpaWyLJERETEJoxE/ijhXXfdxZdffsmCBQsSVYKIiIjYWEKDSufOnRkwYAA//PAD8+fPp0WLFtx0001cf/31VfYPhUKEQqHY82g0yq5du8jIyMAwjNoqW0RERI6CaZrs3buX5s2b43AcZhaKmUAej8f0eDzmxIkTzaVLl5ovvPCC6fV6zenTp1fZ/7777jMBLVq0aNGiRUsdWHJycg6bFRI6ouJ2u+nVqxcLFy6Mtd1yyy0sXryYr7766oD++4+o5OXl0bJlS3JyckhNTa2VmkVEROTo5Ofnk5mZyZ49ewgGg4fsm9DJtM2aNaNz585xbZ06deLtt9+usr/H48Hj8RzQnpqaqqAiIiJygqnOtI2Efj25f//+rFu3Lq5t/fr1tGrVKkEViYiIiJ0kNKjcfvvtLFq0iEceeYSNGzfy+uuv8+KLLzJ27NhEliUiIiI2kdCg0rt3b2bNmsUbb7xBly5deOihh3j66acZMWJEIssSERERm0joZNqjlZ+fTzAYJC8vT3NUREQqiUQilJWVJboMqaeSkpJwOp0H3X4kn98JnUwrIiLHlmmabN++nT179iS6FKnn0tLSaNq06VHf50xBRUSkDtkXUho3bozf79fNMKXWmaZJUVERubm5gPUN36OhoCIiUkdEIpFYSMnIyEh0OVKP+Xw+AHJzc2ncuPEhLwMdTsJ/PVlERI6NfXNS/H5/gisRqTgPj3aulIKKiEgdo8s9YgfH6jxUUBERERHbUlAREZE6pXXr1jz99NMJfw85NjSZVkREEuq8887jtNNOO2bBYPHixSQnJx+T95LEU1ARERHbM02TSCSCy3X4j61GjRrVQkVSW3TpR0REEmbUqFHMnz+fyZMnYxgGhmGwefNm5s2bh2EYzJkzh169euHxeFiwYAGbNm1iyJAhNGnShEAgQO/evfnkk0/i3nP/yzaGYfD3v/+dSy+9FL/fT7t27Xj33XePqM7s7GyGDBlCIBAgNTWVyy+/nP/973+x7StWrOD8888nJSWF1NRUevbsybfffgvAli1bGDx4MA0aNCA5OZlTTz2VDz74oOYHrZ5RUBERqctME8KFtb9U89dZJk+eTN++fbn++uvZtm0b27ZtIzMzM7Z9woQJTJo0iTVr1tCtWzcKCgoYNGgQn3zyCcuWLWPAgAEMHjyY7OzsQ+7ngQce4PLLL2flypUMGjSIESNGsGvXrmoeQpOhQ4eya9cu5s+fz9y5c9m0aRNXXHFFrM+IESM46aSTWLx4MUuWLOGuu+4iKSkJgLFjxxIKhfj8889ZtWoVjz32GIFAoFr7Fl36ERGp2yJF8K8EfCheXgCuw88TCQaDuN1u/H4/TZs2PWD7gw8+yEUXXRR7npGRQffu3WPPH374YWbNmsW7777LuHHjDrqfUaNGcdVVVwHwyCOP8Oyzz/LNN99wySWXHLbGTz75hJUrV5KVlRULUa+++iqnnnoqixcvpnfv3mRnZ3PnnXfSsWNHANq1axd7fXZ2Npdddhldu3YF4OSTTz7sPqWCRlRERMS2evXqFfe8sLCQCRMm0LlzZ9LS0ggEAqxdu/awIyrdunWLrScnJ5OSkhK7xfvhrFmzhszMzLiRnn37X7NmDQDjx4/nuuuu48ILL+TRRx9l06ZNsb633HILDz/8MP379+e+++5j5cqV1dqvWDSiIiJSlzn91uhGIvZ7DOz/7Z0777yTOXPm8Ne//pW2bdvi8/n41a9+RWlp6SHfZ99lmH0MwyAajVarBtM0q7x5WeX2+++/n+HDh/P+++/z4Ycfct999zFjxgwuvfRSrrvuOgYMGMD777/Pxx9/zKRJk3jiiSe4+eabq7X/+k4jKiIidZlhWJdgans5gruSut1uIpFItfouWLCAUaNGcemll9K1a1eaNm3K5s2ba3hwqqdz585kZ2eTk5MTa1u9ejV5eXl06tQp1ta+fXtuv/12Pv74Y4YNG8a0adNi2zIzMxkzZgwzZ87k97//PS+99NJxrbkuUVAREZGEat26NV9//TWbN29mx44dhxzpaNu2LTNnzmT58uWsWLGC4cOHV3tkpKYuvPBCunXrxogRI1i6dCnffPMN11xzDeeeey69evWiuLiYcePGMW/ePLZs2cKXX37J4sWLYyHmtttuY86cOWRlZbF06VI+/fTTuIAjh6agIiIiCXXHHXfgdDrp3LkzjRo1OuR8k6eeeooGDRrQr18/Bg8ezIABAzj99NOPa32GYTB79mwaNGjAOeecw4UXXsjJJ5/Mm2++CYDT6WTnzp1cc801tG/fnssvv5yBAwfywAMPANavWo8dO5ZOnTpxySWX0KFDB6ZMmXJca65LDNOs5nfIbCg/P59gMEheXh6pqamJLkdEJKFKSkrIysqiTZs2eL3eRJcj9dyhzscj+fzWiIqIiIjYloKKiIiI2JaCioiIiNiWgoqIiIjYloKKiIiI2JaCioiIiNiWgoqIiIjYloKKiIiI2JaCioiIiNiWgoqIiCTUeeedx2233ZbQGkaNGsXQoUNjz+1QU3W88sorpKWlHXT75s2bMQyD5cuX11pNx5or0QWIiIjYzcyZM0lKSkp0GYKCioiIyAHS09MTXYKU06UfERFJuHA4zLhx40hLSyMjI4N7772Xyr+Z+89//pNevXqRkpJC06ZNGT58OLm5ubHtu3fvZsSIETRq1Aifz0e7du2YNm1abPvWrVu54ooraNCgARkZGQwZMoTNmzcftJ79L/20bt2aRx55hNGjR5OSkkLLli158cUX415zJPuIRqOcdNJJvPDCC3HtS5cuxTAMvv/+ewCefPJJunbtSnJyMpmZmdx0000UFBQc7nAe0vz58+nTpw8ej4dmzZpx1113EQ6HY9vfeustunbtis/nIyMjgwsvvJDCwkIA5s2bR58+fUhOTiYtLY3+/fuzZcuWo6rncBRURETqgcJCa6n02U9pqdUWClXdNxqtaCsrs9pKSg7ftyamT5+Oy+Xi66+/5plnnuGpp57i73//e6VaS3nooYdYsWIFs2fPJisri1GjRsW2//GPf2T16tV8+OGHrFmzhqlTp9KwYUMAioqKOP/88wkEAnz++ed88cUXBAIBLrnkEkpLS6td4xNPPEGvXr1YtmwZN910EzfeeCNr166t0T4cDgdXXnklr732Wlz766+/Tt++fTn55JNj/Z555hn++9//Mn36dD799FMmTJhQ7Zr3t3XrVgYNGkTv3r1ZsWIFU6dO5eWXX+bhhx8GYNu2bVx11VWMHj2aNWvWMG/ePIYNG4ZpmoTDYYYOHcq5557LypUr+eqrr7jhhhswDKPG9VSLeQLLy8szATMvLy/RpYiIJFxxcbG5evVqs7i4+IBtVkQxzdzciraHH7barrsuvq/fb7VnZVW0PfWU1TZ8eHzfhg2t9v/+t+Z1n3vuuWanTp3MaDQaa/vDH/5gdurU6aCv+eabb0zA3Lt3r2mapjl48GDzt7/9bZV9X375ZbNDhw5x7x8KhUyfz2fOmTPHNE3THDlypDlkyJC4mm699dbY81atWpm/+c1vYs+j0ajZuHFjc+rUqdXex/6WLl1qGoZhbt682TRN04xEImaLFi3M559//qB/97/+9S8zIyMj9nzatGlmMBg8aP+srCwTMJctW2aapmnefffdB9T5/PPPm4FAwIxEIuaSJUtMIFZTZTt37jQBc968eQfdX2WHOh+P5PNbIyoiIpJwZ555Ztz/M+/bty8bNmwgEokAsGzZMoYMGUKrVq1ISUnhvPPOAyA7OxuAG2+8kRkzZnDaaacxYcIEFi5cGHuvJUuWsHHjRlJSUggEAgQCAdLT0ykpKWHTpk3VrrFbt26xdcMwaNq0aezyU0320aNHDzp27Mgbb7wBWJdkcnNzufzyy2N9PvvsMy666CJatGhBSkoK11xzDTt37oxdijlSa9asoW/fvnHHun///hQUFPDDDz/QvXt3LrjgArp27cqvf/1rXnrpJXbv3g1Y83ZGjRrFgAEDGDx4MJMnT2bbtm01quNIKKiIiNQDBQXWUn41BIA777Tannsuvm9urtXesmVF29ixVtvLL8f33bzZau/U6biVTmFhIRdffDGBQIB//vOfLF68mFmzZgHELqsMHDiQLVu2cNttt/Hjjz9ywQUXcMcddwDWfJCePXuyfPnyuGX9+vUMHz682nXs/y0gwzCIll/zquk+RowYweuvvw5Yl30GDBgQu2S1ZcsWBg0aRJcuXXj77bdZsmQJzz//PABlZWXVrrsy0zQPuFRjll8PNAwDp9PJ3Llz+fDDD+ncuTPPPvssHTp0ICsrC4Bp06bx1Vdf0a9fP958803at2/PokWLalRLdSmoiIjUA8nJ1lL5M8rttto8nqr7Oip9QiQlWW1e7+H71sT+H3aLFi2iXbt2OJ1O1q5dy44dO3j00Uc5++yz6dixY9xE2n0aNWrEqFGj+Oc//8nTTz8dm+x6+umns2HDBho3bkzbtm3jlmAweHSFl6vpPoYPH86qVatYsmQJb731FiNGjIht+/bbbwmHwzzxxBOceeaZtG/fnh9//PGo6uzcuTMLFy6Mm6i8cOFCUlJSaNGiBWAFlv79+/PAAw+wbNky3G53LBiCNRI0ceJEFi5cSJcuXWJB63hRUBERkYTLyclh/PjxrFu3jjfeeINnn32WW2+9FYCWLVvidrt59tln+f7773n33Xd56KGH4l7/pz/9iXfeeYeNGzfy3Xff8d5779GpfJhnxIgRNGzYkCFDhrBgwQKysrKYP38+t956Kz/88MMxqb+m+2jTpg39+vXj2muvJRwOM2TIkNi2U045hXA4HPu7X3311QO+JXSkbrrpJnJycrj55ptZu3Yt77zzDvfddx/jx4/H4XDw9ddf88gjj/Dtt9+SnZ3NzJkz+emnn+jUqRNZWVlMnDiRr776ii1btvDxxx+zfv362HE+XhRUREQk4a655hqKi4vp06cPY8eO5eabb+aGG24ArJGSV155hX//+9907tyZRx99lL/+9a9xr3e73UycOJFu3bpxzjnn4HQ6mTFjBgB+v5/PP/+cli1bMmzYMDp16sTo0aMpLi4mNTX1mNR/NPsYMWIEK1asYNiwYfh8vlj7aaedxpNPPsljjz1Gly5deO2115g0adJR1dmiRQs++OADvvnmG7p3786YMWO49tpruffeewFITU3l888/Z9CgQbRv3557772XJ554goEDB+L3+1m7di2XXXYZ7du354YbbmDcuHH87ne/O6qaDscwK4//nGDy8/MJBoPk5eUds5NNROREVVJSQlZWFm3atMG7/zUakVp2qPPxSD6/NaIiIiIitqWgIiIiIraloCIiIiK2paAiIiIitqWgIiIiIraloCIiIiK2paAiIiIitqWgIiIiIraloCIiIiK2paAiIiInvNatW/P0008fdPuoUaMYOnRordUjx46CioiIiNiWgoqIiIjYVkKDyv33349hGHFL06ZNE1mSiIjUor/97W+0aNGCaDQa1/7LX/6SkSNHArBp0yaGDBlCkyZNCAQC9O7dm08++eSo9hsKhbjlllto3LgxXq+Xs846i8WLF8e27969mxEjRtCoUSN8Ph/t2rVj2rRpAJSWljJu3DiaNWuG1+uldevWR/2rxnJwrkQXcOqpp8adcE6nM4HViIjULaYJRUW1v1+/Hwzj8P1+/etfc8stt/DZZ59xwQUXAFZImDNnDv/5z38AKCgoYNCgQTz88MN4vV6mT5/O4MGDWbduHS1btqxRfRMmTODtt99m+vTptGrViscff5wBAwawceNG0tPT+eMf/8jq1av58MMPadiwIRs3bqS4uBiAZ555hnfffZd//etftGzZkpycHHJycmpUhxxewoOKy+XSKIqIyHFSVASBQO3vt6AAkpMP3y89PZ1LLrmE119/PRZU/v3vf5Oenh573r17d7p37x57zcMPP8ysWbN49913GTdu3BHXVlhYyNSpU3nllVcYOHAgAC+99BJz587l5Zdf5s477yQ7O5sePXrQq1cvwJqsu092djbt2rXjrLPOwjAMWrVqdcQ1SPUlfI7Khg0baN68OW3atOHKK6/k+++/P2jfUChEfn5+3CIiIie2ESNG8PbbbxMKhQB47bXXuPLKK2Mj7IWFhUyYMIHOnTuTlpZGIBBg7dq1ZGdn12h/mzZtoqysjP79+8fakpKS6NOnD2vWrAHgxhtvZMaMGZx22mlMmDCBhQsXxvqOGjWK5cuX06FDB2655RY+/vjjmv7pUg0JDSpnnHEG//jHP5gzZw4vvfQS27dvp1+/fuzcubPK/pMmTSIYDMaWzMzMWq5YROTE4vdboxu1vfj91a9x8ODBRKNR3n//fXJycliwYAG/+c1vYtvvvPNO3n77bf785z+zYMECli9fTteuXSktLa3RMTFNEwBjv2tTpmnG2gYOHMiWLVu47bbb+PHHH7ngggu44447ADj99NPJysrioYceori4mMsvv5xf/epXNapFqsG0kYKCArNJkybmE088UeX2kpISMy8vL7bk5OSYgJmXl1fLlYqI2E9xcbG5evVqs7i4ONGlHLGRI0eaw4YNMx977DGzQ4cOcdu6dOliPvjgg7Hne/fuNYPBoHnrrbfG2lq1amU+9dRTh3z/IUOGmKZpfda43W7ztddei20vLS01W7RoYf7lL3+p8vUvvPCCmZKSUuW2jz76yATMnTt3HuavrF8OdT7m5eVV+/M74XNUKktOTqZr165s2LChyu0ejwePx1PLVYmIyPE2YsQIBg8ezHfffRc3mgLQtm1bZs6cyeDBgzEMgz/+8Y8HfEvoSCQnJ3PjjTdy5513kp6eTsuWLXn88ccpKiri2muvBeBPf/oTPXv25NRTTyUUCvHee+/RqVMnAJ566imaNWvGaaedhsPh4N///jdNmzYlLS2txjXJwdkqqIRCIdasWcPZZ5+d6FJERKQW/exnPyM9PZ1169YxfPjwuG1PPfUUo0ePpl+/fjRs2JA//OEPRz1H8dFHHyUajXL11Vezd+9eevXqxZw5c2jQoAEAbrebiRMnsnnzZnw+H2effTYzZswAIBAI8Nhjj7FhwwacTie9e/fmgw8+wOFI+LTPOskwzfKLdQlwxx13MHjwYFq2bElubi4PP/ww8+fPZ9WqVdWaRZ2fn08wGCQvL4/U1NRaqFhExL5KSkrIysqiTZs2eL3eRJcj9dyhzscj+fxO6IjKDz/8wFVXXcWOHTto1KgRZ555JosWLdJXvURERARIcFDZN4wmIiIiUhVdUBMRERHbUlARERER21JQEREREdtSUBERERHbUlARERER21JQEREREdtSUBERERHbUlAREZGEOu+887jtttsO2ad169Y8/fTTx72WefPmYRgGe/bsOe77OlKvvPLKIX9PaPPmzRiGwfLly2utptqgoCIiIgk1c+ZMHnrooVrfb1UBqV+/fmzbto1gMAgcPhzI8WerHyUUEZH6Jz09PdElxLjdbpo2bZroMqQSjaiIiEhC7T+ykZuby+DBg/H5fLRp04bXXnvtgNfk5eVxww030LhxY1JTU/nZz37GihUrYtvvv/9+TjvtNF599VVat25NMBjkyiuvZO/evQCMGjWK+fPnM3nyZAzDwDAMNm/eHHfpZ968efz2t78lLy8v1uf+++/nwQcfpGvXrgfU1LNnT/70pz8d0B6NRjnppJN44YUX4tqXLl2KYRh8//33ADz55JN07dqV5ORkMjMzuemmmygoKKjRMd1n/vz59OnTB4/HQ7NmzbjrrrsIh8Ox7W+99RZdu3bF5/ORkZHBhRdeSGFhIWBdBuvTpw/JycmkpaXRv39/tmzZclT11ISCiohIXWaaEInU/mKaNS551KhRbN68mU8//ZS33nqLKVOmkJubW+lPMvn5z3/O9u3b+eCDD1iyZAmnn346F1xwAbt27Yr127RpE7Nnz+a9997jvffeY/78+Tz66KMATJ48mb59+3L99dezbds2tm3bRmZmZlwd/fr14+mnnyY1NTXW54477mD06NGsXr2axYsXx/quXLmSZcuWMWrUqAP+HofDwZVXXnlA4Hr99dfp27cvJ598cqzfM888w3//+1+mT5/Op59+yoQJE2p8HLdu3cqgQYPo3bs3K1asYOrUqbz88ss8/PDDAGzbto2rrrqK0aNHs2bNGubNm8ewYcMwTZNwOMzQoUM599xzWblyJV999RU33HADhmHUuJ6a0qUfEZG6LBqFL5bV/n7P6gFO5xG/bP369Xz44YcsWrSIM844A4CXX36ZTp06xfp89tlnrFq1itzcXDweDwB//etfmT17Nm+99RY33HADYI1kvPLKK6SkpABw9dVX83//93/8+c9/JhgM4na78fv9B73U43a7CQaDGIYR1ycQCDBgwACmTZtG7969AZg2bRrnnntuLHTsb8SIETz55JNs2bKFVq1aEY1GmTFjBnfffXesT+VRpTZt2vDQQw9x4403MmXKlCM9jABMmTKFzMxMnnvuOQzDoGPHjvz444/84Q9/4E9/+hPbtm0jHA4zbNgwWrVqBRAbKdq1axd5eXn84he/4JRTTgGI+zeoTRpRERER21izZg0ul4tevXrF2jp27Bg3oXXJkiUUFBSQkZFBIBCILVlZWWzatCnWr3Xr1rGQAtCsWbO4kZmjcf311/PGG29QUlJCWVkZr732GqNHjz5o/x49etCxY0feeOMNwLokk5uby+WXXx7r89lnn3HRRRfRokULUlJSuOaaa9i5c2fsUsyRWrNmDX379o0bBenfvz8FBQX88MMPdO/enQsuuICuXbvy61//mpdeeondu3cD1ryhUaNGMWDAAAYPHszkyZPZtm1bjeo4WhpRERGpyxwOa3QjEfutAbP8ktGhLjFEo1GaNWvGvHnzDthWOdAkJSXFbTMMg2g0WqO69jd48GA8Hg+zZs3C4/EQCoW47LLLDvmaESNG8Prrr3PXXXfx+uuvM2DAABo2bAjAli1bGDRoEGPGjOGhhx4iPT2dL774gmuvvZaysrIa1Wia5gHHsfLxdTqdzJ07l4ULF/Lxxx/z7LPPcs899/D111/Tpk0bpk2bxi233MJHH33Em2++yb333svcuXM588wza1RPTWlERUSkLjMM6xJMbS81nMvQqVMnwuEw3377baxt3bp1cfc1Of3009m+fTsul4u2bdvGLfs++KvD7XYTiURq1MflcjFy5EimTZvGtGnTuPLKK/H7/Yd8r+HDh7Nq1SqWLFnCW2+9xYgRI2Lbvv32W8LhME888QRnnnkm7du358cff6z231KVzp07s3Dhwlg4AVi4cCEpKSm0aNECsAJL//79eeCBB1i2bBlut5tZs2bF+vfo0YOJEyeycOFCunTpwuuvv35UNdWEgoqIiNhGhw4duOSSS7j++uv5+uuvWbJkCddddx0+ny/W58ILL6Rv374MHTqUOXPmsHnzZhYuXMi9994bF3AOp3Xr1nz99dds3ryZHTt2VDna0rp1awoKCvi///s/duzYQVFRUWzbddddx6effsqHH354yMs++7Rp04Z+/fpx7bXXEg6HGTJkSGzbKaecQjgc5tlnn+X777/n1VdfPeBbQkfqpptuIicnh5tvvpm1a9fyzjvvcN999zF+/HgcDgdff/01jzzyCN9++y3Z2dnMnDmTn376iU6dOpGVlcXEiRP56quv2LJlCx9//DHr169PyDwVBRUREbGVadOmkZmZybnnnsuwYcNiX0PexzAMPvjgA8455xxGjx5N+/btufLKK9m8eTNNmjSp9n7uuOMOnE4nnTt3plGjRmRnZx/Qp1+/fowZM4YrrriCRo0a8fjjj8e2tWvXjn79+tGhQ4fYxN/DGTFiBCtWrGDYsGFx4eu0007jySef5LHHHqNLly689tprTJo0qdp/S1VatGjBBx98wDfffEP37t0ZM2YM1157Lffeey8AqampfP755wwaNIj27dtz77338sQTTzBw4ED8fj9r167lsssuo3379txwww2MGzeO3/3ud0dVU00YpnkU3yFLsPz8fILBIHl5eaSmpia6HBGRhCopKSErK4s2bdrg9XoTXU6dZ5omHTt25He/+x3jx49PdDm2c6jz8Ug+vzWZVkRE5Ajl5uby6quvsnXrVn77298mupw6TUFFRETkCDVp0oSGDRvy4osv0qBBg0SXU6cpqIiIiByhE3jWxAlHk2lFRETEthRURETqGP2/fbGDY3UeKqiIiNQR++7EWvleHyKJsu883P8OwUdKc1REROoIp9NJWlpa7Pds/H5/Qn7tVuo30zQpKioiNzeXtLQ0nDX4ccrKFFREROqQfb/ye6x+fE+kptLS0g76y9RHQkFFRKQOMQyDZs2a0bhx4xr/mJ3I0UpKSjrqkZR9FFREROogp9N5zD4oRBJJk2lFRETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2bBNUJk2ahGEY3HbbbYkuRURERGzCFkFl8eLFvPjii3Tr1i3RpYiIiIiNJDyoFBQUMGLECF566SUaNGiQ6HJERETERhIeVMaOHcvPf/5zLrzwwsP2DYVC5Ofnxy0iIiJSd7kSufMZM2awdOlSFi9eXK3+kyZN4oEHHjjOVYmIiIhdJGxEJScnh1tvvZV//vOfeL3ear1m4sSJ5OXlxZacnJzjXKWIiIgkkmGappmIHc+ePZtLL70Up9MZa4tEIhiGgcPhIBQKxW2rSn5+PsFgkLy8PFJTU493ySIiInIMHMnnd8Iu/VxwwQWsWrUqru23v/0tHTt25A9/+MNhQ4qIiIjUfQkLKikpKXTp0iWuLTk5mYyMjAPaRUREpH5K+Ld+RERERA4mod/62d+8efMSXYKIiIjYiEZURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLZqFFSmT5/O+++/H3s+YcIE0tLS6NevH1u2bDlmxYmIiEj9VqOg8sgjj+Dz+QD46quveO6553j88cdp2LAht99++zEtUEREROovV01elJOTQ9u2bQGYPXs2v/rVr7jhhhvo378/55133rGsT0REROqxGo2oBAIBdu7cCcDHH3/MhRdeCIDX66W4uPjYVSciIiL1Wo1GVC666CKuu+46evTowfr16/n5z38OwHfffUfr1q2PZX0iIiJSj9VoROX555+nb9++/PTTT7z99ttkZGQAsGTJEq666qpqv8/UqVPp1q0bqamppKam0rdvXz788MOalCQiIiJ1kGGappmonf/nP//B6XTG5rtMnz6dv/zlLyxbtoxTTz31sK/Pz88nGAySl5dHamrq8S5XREREjoEj+fyu0YjKRx99xBdffBF7/vzzz3PaaacxfPhwdu/eXe33GTx4MIMGDaJ9+/a0b9+eP//5zwQCARYtWlSTskRERKSOqVFQufPOO8nPzwdg1apV/P73v2fQoEF8//33jB8/vkaFRCIRZsyYQWFhIX379q2yTygUIj8/P24RERGRuqtGk2mzsrLo3LkzAG+//Ta/+MUveOSRR1i6dCmDBg06ovdatWoVffv2paSkhEAgwKxZs2Lvvb9JkybxwAMP1KRkEREROQHVaETF7XZTVFQEwCeffMLFF18MQHp6+hGPcnTo0IHly5ezaNEibrzxRkaOHMnq1aur7Dtx4kTy8vJiS05OTk3KFxERkRNEjUZUzjrrLMaPH0///v355ptvePPNNwFYv349J5100hG9l9vtjk2m7dWrF4sXL2by5Mn87W9/O6Cvx+PB4/HUpGQRERE5AdVoROW5557D5XLx1ltvMXXqVFq0aAHAhx9+yCWXXHJUBZmmSSgUOqr3EBERkbqhRiMqLVu25L333jug/amnnjqi97n77rsZOHAgmZmZ7N27lxkzZjBv3jw++uijmpQlIiIidUyNggpY39KZPXs2a9aswTAMOnXqxJAhQ3A6ndV+j//9739cffXVbNu2jWAwSLdu3fjoo4+46KKLalqWiIiI1CE1CiobN25k0KBBbN26lQ4dOmCaJuvXryczM5P333+fU045pVrv8/LLL9dk9yIiIlJP1GiOyi233MIpp5xCTk4OS5cuZdmyZWRnZ9OmTRtuueWWY12jiIiI1FM1GlGZP38+ixYtIj09PdaWkZHBo48+Sv/+/Y9ZcSIiIlK/1WhExePxsHfv3gPaCwoKcLvdR12UiIiICNQwqPziF7/ghhtu4Ouvv8Y0TUzTZNGiRYwZM4Zf/vKXx7pGERERqadqFFSeeeYZTjnlFPr27YvX68Xr9dKvXz/atm3L008/fYxLFBERkfqqRnNU0tLSeOedd9i4cSNr1qzBNE06d+4cu8OsiIiIyLFQ7aByuF9FnjdvXmz9ySefrHFBIiIiIvtUO6gsW7asWv0Mw6hxMSIiIiKVVTuofPbZZ8ezDhEREZED1GgyrYiIiEhtUFARERER21JQEREREdtSUBERERHbUlARERER21JQEREREdtSUBERERHbUlARERER21JQEREREdtSUBERERHbUlARERER21JQEREREdtSUBERERHbUlARERER21JQEREREdtSUBERERHbUlARERER21JQEREREdtSUBERERHbUlARERER21JQEREREdtSUBERERHbUlARERER21JQEREREdtSUBERERHbUlARERER21JQEREREdtSUBERERHbUlARERER21JQEREREdtSUBERERHbUlARERER21JQEREREdtSUBERERHbUlARERER21JQEREREdtSUBERERHbUlARERER21JQEREREdtSUBERERHbUlARERER20poUJk0aRK9e/cmJSWFxo0bM3ToUNatW5fIkkRERMRGEhpU5s+fz9ixY1m0aBFz584lHA5z8cUXU1hYmMiyRERExCYM0zTNRBexz08//UTjxo2ZP38+55xzzmH75+fnEwwGycvLIzU1tRYqFBERkaN1JJ/ftpqjkpeXB0B6enqCKxERERE7cCW6gH1M02T8+PGcddZZdOnSpco+oVCIUCgUe56fn19b5YmIiEgC2GZEZdy4caxcuZI33njjoH0mTZpEMBiMLZmZmbVYoYiIiNQ2W8xRufnmm5k9ezaff/45bdq0OWi/qkZUMjMzNUdFRETkBHIkc1QSeunHNE1uvvlmZs2axbx58w4ZUgA8Hg8ej6eWqhMREZFES2hQGTt2LK+//jrvvPMOKSkpbN++HYBgMIjP50tkaSIiImIDCb30YxhGle3Tpk1j1KhRh329vp4sIiJy4jmhLv2IiIiIHIxtvvUjIiIisj8FFREREbEtBRURERGxLQUVERERsS0FFREREbEtBZVDiUYSXYGIiEi9pqBSle2fwMf94b8PJroSERGRek1BpSqhnbBjIWx6CaJlia5GRESk3lJQqcpJl4K3CRRvgx/eSXQ1IiIi9ZaCSlWcbjjlOmt9w5TE1iIiIlKPKagcTNsbwHDA/z6DvLWJrkZERKReUlA5mOSW0PwX1vrGFxJbi4iISD2loHIo7W6yHr9/BcKFCS1FRESkPlJQOZRmF0HgFCjLgy0zEl2NiIhIvaOgciiGA9qNsdbXTwHTTGw9IiIi9YyCyuGc/FtweGD3Uti5ONHViIiI1CsKKofjyYBWV1jr+qqyiIhIrVJQqY52N1qP2W9ad60VERGRWqGgUh0ZZ0CDHhApsb4BJCIiIrVCQaU6DKNiVGXDC2BGE1uPiIhIPaGgUl2th0NSKhRstH5dWURERI47BZXqciVDm5HW+oapia1FRESknlBQORL7Lv9sfRcKcxJbi4iISD2goFIF04RFC8Ks+rY0fkOwEzQ+z5qjsumlhNQmIiJSnyioVOGjaT9xWskK/vf11gM37htV2fgSRMtqtzAREZF6RkGlCp16evG6TfqespttOZH4jScNBW9TKNkOP8xORHkiIiL1hoJKFVp3C7A510uyL8qyObviNzrdcMp11rom1YqIiBxXCipVMQx2uBoC0ML5E9H9b5vS9gbrBwv/9xnkran9+kREROoJBZWD6PyzDErLDLqfXMQ3nxbFb0zOhBaDrfUNL9R+cSIiIvWEgspB+INJ/Hd7GgC71+w4sEO7m6zHrFcgXFhrdYmIiNQnCiqHkNrBuvxzRuud7Mjd7/pP0wshcAqU5cPmNxJQnYiISN2noHIIbXulsm2Xm/TUCF+/vzt+o+Go9Ps/U6ybr4iIiMgxpaByKIbB1mgGABmlOw7MIiePAocHdi+Dnd/UenkiIiJ1nYLKYXQ4tyHRKJzZcS9LvyyJ3+jJgFZXWusbptR+cSIiInWcgsphpDT08N22VAB+XFrVpNryyz9b3oTQzlqsTEREpO5TUKkGdytrUm2vk3aSt2e/6z8ZfaDB6RANwffTElCdiIhI3aWgUg3tz0xj114XzTLK+PI/efEbDaPSpNoXrB8sFBERkWNCQaUaDKeDrCJrUq0/v4rLP62vgqQgFGyCbXNruToREZG6S0Glmk7ub13+OavTHlZ9Wxq/0ZUMbUZa6xv1+z8iIiLHioJKNTVo4WPttgAuJ2xaWMWk2X2Xf7b+Bwpzarc4ERGROkpB5QhEm1ijKt0a76CwYL9JtcGO0OR8a47KxhcTUJ2IiEjdo6ByBDqe1YC9xQ5ObhZiwXt7D+ywb1Rl098hUnrgdhERETkiCipHwJHkZEOeNanW2F7FpNqThoK3KZRshx9m12ptIiIidZGCyhFq2ce6/HPuqbtZ+99w/EZHErS93lrfoEm1IiIiR0tB5Qg1bO0nK9eH122y+rNdB3ZoewMYTsidB3mra70+ERGRukRB5UgZBoXBRgC0T/2JUMl+k2r9J0GLwdb6hhdquTgREZG6RUGlBjqek05JqUGX1sXMf7/owA7tbrIes6ZDWUHtFiciIlKHKKjUgMvrYt2uBgCEtlQxqbbpBRBoC2X5sOWNWq5ORESk7lBQqaEm3csn1XbcSdbGSPxGw1Hp93+mgLnf5SERERGpFgWVGmraIYUfd3lITY6yZM7uAzucPAqcXti9HHZ+XdvliYiI1AkKKjVlGOx0W6MqLV07CO/3TWU86dDqSmt9/ZTarU1ERKSOSGhQ+fzzzxk8eDDNmzfHMAxmz56dyHKOWIdzMwhHoE/HAj7/qPjADm3LL/9k/wtCVfw+kIiIiBxSQoNKYWEh3bt357nnnktkGTXmDrhZ91MaAHlrq5hUm9Eb0ntCNATfPVK7xYmIiNQBrkTufODAgQwcODCRJRy1YMeGsGcPZ7XbydYfWtDipErZzzDg1HthwaWw9klI6wYnj0xcsSIiIieYE2qOSigUIj8/P25JtJO6BdmRn0SjtDBfvbfnwA6ZQ+HUe6z1b66H3AW1WZ6IiMgJ7YQKKpMmTSIYDMaWzMzMRJcEhsE20/qhwkbhHUSjVfTp9iBk/gqiZdboSsH3tVujiIjICeqECioTJ04kLy8vtuTk5CS6JADanW19++fsU/P58v9CB3YwHNB3OqT3sibVzvsFlObVcpUiIiInnhMqqHg8HlJTU+MWO/A28LLufyk4HLBt+UG+3ePywznvgK8F5K+BLy6H6P7faRYREZHKTqigYmfeNtaoyhmtdvBT7kHuROtvDue+C04/bP8Ylt5eixWKiIiceBIaVAoKCli+fDnLly8HICsri+XLl5OdnZ3IsmqkVc8G5BU5adWklPnvHGKSb/rp0O+f1vr652D987VToIiIyAkooUHl22+/pUePHvTo0QOA8ePH06NHD/70pz8lsqyacTjICVmTagN7dxz6530yL4XTHrXWl9wK2z4+/vWJiIicgAzTPHF/MS8/P59gMEheXp4t5qsU/q+I5LWrKS0zWObpxhlnJx28s2nCot9C1nRISoWLv4Jg59orVkREJEGO5PNbc1SOoeQmfr7f4cedZPL9osPcMt8woM/foNHZUJYP8wdDSRV3txUREanHFFSOtaaNAOjRdAd7dh9msMrpgbNnQuBk694qC4ZBpIqvN4uIiNRTCirHWJs+6RSFHHRsWcKnswsO/wJvQzj3P9bln58WwDe/49ATXEREROoPBZVjzEhyklXQAADnT4eZVLtPsDP0/5d1Y7is6bDm8eNbpIiIyAlCQeU4yOxtXf65sNtuli+p5k3dmg+Ans9Y68snQs7s41OciIjICURB5ThIPSmZrbu9JPuirP5sd/Vf2H4stBsLmLBwBOxadtxqFBEROREoqBwPhkFx0LpTbacGP1FQjakqMT2fhqYXQ6TI+iZQ0Y/HpUQREZETgYLKcXJKvwxKwwantyvik3eKqv9ChwvOehNSO0HxVvh8CISP4PUiIiJ1iILKcWJ4kti0Jw2AsuwjvD+KO836JpAnA3Z9C1+NBDN6zGsUERGxOwWV46jJadblnwu77uTtf0f56acjeHHKKXD2LHAkQc5bsPK+41OkiIiIjbkSXUBdln5yKrmr3TROLaXZlnX8474A2bt8hD1+0lp46dLVQbdu0KEDuKr6l2h8NvR50brV/ncPQ2pHaDOi1v8OERGRRFFQOZ4MA+OkxpD/A/26FNKvS2FsU1nYYG22l+Vv+Hhti4/8qJ+kNB+t2yfRvbtB9+7QoAFw8ijIXwurH4OvR0OgDTTql7A/SUREpDbpRwmPN9OEgmIoKKR0TzElPxXhCRfjcUaq7P7THhcrNvlYucnPD7t9RHx+0pp5ONX1JN0Df6dt5g6cTfuDvyUkZ5Y/tgR/JviaW5NxRUREbOxIPr8VVBLBNCFUBoVFRPcWU5hbhLm3mICzBIdxYPdwBNZme1mx0c+67CSKiosJh/cSDe/Gae7A69xGmn87jVN30LhRhEaNXTRu7qNx81T8Gc0qgkxyS3CnWz+IKCIikiAKKieqSBSKiqGgmNCuIop3FOMNF+F1VT36UllBsYPc3S5y9yTFPe4piFIaKiRclo8R3YXLsQu/r5CGDaOkZzgIpLhISU0iEPSQkuYlEPQTSAuQkp6CJyUNw9MAnD6FGxEROWaO5PNb1wnsxOmAlGRIScbTDDxgjb6UlkGBNfpStLOESHEYs7QMlxnG6yjD5TAJ+KIEfKWc3Lz0YG8ONCpfYEeei135TvKLnOQXOvlpp5NNRQ7yC53sLbbaCkugtPQnwuFSIpEyomYpJmEczjIczihut0lyCgQCDvzJTnx+Bz6/C1+yE6/PjS+QhM/vxhfw4E324Uv24Au48XrB57OWKicRi4iIlNPHhN0ZBnjc4HHjyEgj0Hq/7aZpjcSUlUFp2Hoss4JMWWGYUEGYaEkZRjhMklmGxxnGYUDDYJiGwWr+DtEBkgCIRq2RnPwiJwXFTgqKHRQWOyjY66Qg18H2EgcFxSaFJaUUFIcpKC6isMQR17ek1KQsEiESCROJRjCJkuQqw++L4vNG8Pui+P2mtfjAn2zg9zvwJzvwJ7vwJbvwJyfhDyThD7jxp3jxB1z4/eD3Q3KytbjdR/OPICIiiaKgcqIzDHA5rcVXqRlwly9xTBPKwhVLJGIFnXCkfD1CtCxCOBQlXBLBLC3FLCuDSBiHGcWJSZITHIaBwwGpyVFSk6NA2TH7k8IRKCx2UhRyUFhiBZqKdSeFJQ6KShwU7nawvQSKQhEKi00KSyIUhUIUFlt9i0JWKCosdhAqi2JGS8EoxecuJdkXItlXRsBfSrKvjGRfmGR/hOTkCMm+KMnJJsl+k0AAkgMGyckG/uQkklNc+ANuklPdJKe48af48AV8GEl+cHh0iUxE5BhTUKlvDAPcSdZyEA4OEnL2MU1rOCUu4EQrPUYhWmk9EoFwWWwxI2GiZREIm5hRE8MEwzRwlH/Iu5wQDEQIBg4/N6cmSkoNa+SnPPQUlvgq1stDTkGxk/z/Odi2pSLs7C12srfIwd4ip7UU71t3YFBMsicfv6eYZE8xfm+IZG8Iv9cKQn5vmGR/GL8vSiDZCkKBAAQCBoEUB8kBF4FUF4EUKwQFgl4CqV58qck43MngClg3/xMRqWcUVOTIGQY4ndZyiMBz0JdjzZg5QDRaEYAi+9YjldYrb49U8TwCkTIIh2NByYyYEAVMMLCCkNdt4nVHyAgeuyAUiUBBceXwksTeIm+V4WbXbiebtzrJK7DmCOUVWnOC9j2WlBrlR8mS7Ckg4N1JwFtAsreEgK+YgC9Esq+UgL+MgD9MIDlSKQAZVghKcZYvLpJT3ASCHisABb14kpMxkvYFILdGgkTEthRUxD4cDms5hmdl7OPXNMvn8+w30nPQ8BOJD0zhyH6BKIoZiWBErN9gcsaNAh3dZbDSMsgvcpUHGQd5hU7yClzl6ynkF6bFQs2eAidZ213sLrDWd+91safASWnZoX8dw+kIk+wpJODdQcBbQMBbTDC5kGCgmGAgRFpqiGBKGWnBCMGgSVoQgmkOgmlO0hokEUz3EEz34vYHICkVklKsR1cyGPplDhE5dhRUpH4wDGtxOPbNBT76t4SKycyVL3+F978UVh5uwpXX938s34Y1SHV0k52t2/TkFxrkFzrYU+hgV76LnfluduS52VPgKg81TvYU+Nm9N8UKPDtc7NlsjeyYZvVGWHzuItL8ewj68wj6ckhL3kMwuZC0QDHBlBBpqaUEU8KkBaMEg5DWwKgIOw3cpDTwYbhTISlYHnjKF1cKOKocdxORekZBReRoVJ7MfLT2hZ4DQkylx4O1hcMV64AnCRqlmTRKiwD7RnmKq11GSRkUlhjsLYK8Qge78x3szHeRuzuJ/+32sH2Xn917rbCzuyDInoJ0NvzkYk+Wk2i0+peRHEaEVF8+acl7ygPP97HgkxYoJNkXxpXkwJXkrLS4cLldOF1JuNz7Fnf5koQzyYvL48Hl9uDyeMsXH16/h5Sgi5RUg5QUSEnR1+NFTgT6n6mIXRyL0LPv8lY4AmX7BZgq1/dri5oYBvjc4HObNEwFK+hUP+yEwgbFZQ6KSgz2FhvkFxrs2etgZ76TnXtc5O52s22nl535bvIKnewpcLOnoBk/7DmJ77Y6KQvX3qUjr7uEFF8Rqf5CUvwlpPhDpCSHSEkuJSW5jJRAmJTkCCkpUVICZizg+JNdeP1uvMluvH4PHr8Xb7LPWgJ+vMl+XG6NCIkcCwoqInWJYVjDBC4XeGvw+mjU+tp6VYHmYMFnX/+oNV/H4zLxuCKk+YAGR15CWdRBSZmD4lKDwpBBccigOARFISgqgcISk8JiKCw2KSiKUlAEewshvxAKikzyCqznoVIH4YhBJGJQFnZRXOZjb3EKe0tSKA17ACgp9VJS6uWnvPQaHKxDczrCeJJCeN2leJNK8brL8HrCeN1hPJ4oXk8EryeKx2PiToricZu43SYet2m1ucHjMfF4jPJ1A4/XwO124PE6rHWPE4/HgcfrxONz4fG68PqSKoUot9Xusa56ipyIFFREpILDUX6DwRq8dt+k4/1HaiJVjehU3haumNcDJDmiJHmipHiAlKP5W7BGp5wGOE1wFIFRAI4fiUQjlJVFCZVFKQlFKS6JUFRsUlgcpaAQ8gtM8gsM9uw12JPvYHe+k515LnbscbNzj5vCEjclIRclpUmUlCYRKnNTUuqhLFLxpf5I1EVRyEVRKPko/ohjx+0K4XWH8LhKrfDktsKTxx0uD1ERvO5weYAK43WXP8bWrec+byQWsryxwGXi9UbLt5l4veWLx8TlMnC4nBgOBw6nC8NwlD934nBaj4bDaf2gquEEo/yxyudJ4PRY31RzeMDpLl93axJ3HaagIiLHhsMBbkeNvrIOWJet4kJMpQBT5byd+BsVxtaj5T9fFo1CabTKXTnLFy8GQYcT/E7wH7ZAIGQtjvKJ2U5H3KNpOIhgEomYhKMRwuEIZWURysrChMoihEJhQqEwJSVhikvCFBdHCJVCcYlJSQhKykeOSkqgqPzGhkUlBoXFTkrLnIRKnYRKXdZ6WRKlYRehsiRC5WEpFLYCU0mZl5IyL6ZZ8eFdGvbERpLsyGFEMAwThxE96KPbVYrHFcKTVILHlYcnKVT+PIQnqRRPUhmepDAedxmepAgedxiPO1JpieLxWKNXDgcYhoHDWf5oULHuMKy5906j/DlW0DLAcFjbrTYDp9Ma8bIWA7fbIMlt4PZYo18Vjw7cHidut4MktwOHK6kifDlc1n2SDJe1OCo/Ju33vPJ6/QhnCioiYg+GAUkuazka+9+MMO6mhJW+fn7AvXkq3ajwgPv5RGOXtqx9mFbfcPy9eAys/6i62G9QyuWwFt9RfOUs9s218pAUWzfBKANHGIwSq80A0zAxTZNIJEo4YlqhKWISLotSFo5SWhaltDRKaalJqNSkJBSxHktMikMGJaUGJSGDomIHxSUOCkNOioqdFJU4KSxyUFjioqDYRUFRkvVY7KIklBQbZSoOuYlGq/9BGjWdYFqzoeoDl7OMJGcZbleptThLSXKV4XRErMUI4XQU4XKGK9oqLS5HebszitNh4nREcTmj1nOnCRiYODBNB1HTiYkTE6N8vXK7A9O0+lrPjUrbrPWfnVPAPU/2SdyxStieRUSOh3334znawLO/yndkrurxUNviHvcLQ6ZZHnwqre+778/++zfLb2BYjY/zfbcNdFDFN/JdgMsAX+UJv9UJUfsmVh9spxVhyixfNzFiNxS01q3qTAzM8teYxLeZVPS3Piyt9YhpzTkKR43yq4km4bBJWdikrMxaSisvpRAqhVCpaT2GKA9g+65Glr9PxCQcMSqmXO1r3zcNK+KgrKzylCyDsjKDsgiUhQ3KwtaIV9wSdlFa5qIsbI1+VR7dAghHkghHkiguPexQXsI1abAwoftXUBERqY7Kd2SuDbFgEq0IL3GBpnL7QQLPwdbN8tfGrVex3TQPDE8HC1JxNVfcbPG43fPYwMpWdvlliX0hzSgPY0ZFQMOwgpsJ5aMXhnUYMYiaRvnht9ZNsB5NyrcZFRnZhGjULM/FZkU+Ns3yQUKzPAubRE1rRA3AxMQ0o+XPrX8jkygm1r+1aVq377YeI7HtmBEgSlrTxM6zUlAREbGjyjcptCOzGkFmX/gxwfqAJD7kxG2rtD3u/Stv3+/99/WPaztY30P0M6lee3WOx0HE/zBGNRzxC2rKUb4cQqNj/624I6GgIiIiR67yiEF9uWXMYcNNedv+oeyQ2w7SHttfTbZDXPjbPwhW2XawgAj4EjsJW0FFRESkOiqHM6k1Nh1TFBEREVFQERERERtTUBERERHbUlARERER21JQEREREdtSUBERERHbUlARERER21JQEREREdtSUBERERHbUlARERER21JQEREREdtSUBERERHbUlARERER21JQEREREdtSUBERERHbUlARERER21JQEREREdtSUBERERHbSnhQmTJlCm3atMHr9dKzZ08WLFiQ6JJERETEJhIaVN58801uu+027rnnHpYtW8bZZ5/NwIEDyc7OTmRZIiIiYhOGaZpmonZ+xhlncPrppzN16tRYW6dOnRg6dCiTJk067Ovz8/MJBoP8+GMeTZumYhhWe2kplJWBywUeT0X/wkLr0ecDR3lEKyuz+jud4PXWrG9REZim1eZ0Wm3hMIRC1mt9vpr1LS6GaNT6G1wuqy0SgZKSI+trGOD3V/QtKbG2ud2QlHTkfaNRa38AyckVfUMh629JSrL6H2lf07SOD1g17P/veSR9q/NvfyzOk6r+PY/FebLv3/Noz5P9/z2P9jw52L/n0Z4nlf89j/Y8Odi/Z03PE/034sj76r8RFX3134iq++77/M7LyyM1NZVDMhMkFAqZTqfTnDlzZlz7LbfcYp5zzjlVvqakpMTMy8uLLdnZ2SZgQo65aVNF+7335pmQZ15zTV5cf5/Pal+5sqJt0iSr7de/ju+bnm61L1pU0TZ5stU2aFB838xMq/3TTyvaXnzRajvvvPi+HTpY7e+9V9H22mtWW58+8X179LDa//WvirZZs6y2Ll3i+/bvb7W/8kpF25w5VtvJJ8f3vegiq33KlIq2BQustqZN4/sOGWK1/+UvFW1Ll1ptqanxfa+6ymp/8MGKtjVrrDanM77vdddZ7XfdVdG2ZYvVBnnmjh0V7TffbLXdfHNF244dFX23bKlov+suq+266+L353Ra7WvWVLQ9+KDVdtVV8X1TU632pUsr2v7yF6ttyJD4vk2bWu0LFlS0TZlitV10UXzfk0+22ufMqWh75RWrrX//+L5duljts2ZVtP3rX1Zbjx7xffv0sdpfe62i7b33rLYOHeL7nnee1f7iixVtn35qtWVmxvcdNMhqnzy5om3RIqstPT2+769/bbVPmlTRtnKl1ebzxfe95hqr/d57K9o2bar496zcd8wYq+33v69o+/HHir4//ljR/vvfW21jxsS/x76++m+E/huh/0bY578ROTk5JmDu2bPnsHkhYUFl69atJmB++eWXce1//vOfzfbt21f5mvvuu688mGjRokWLFi1aTvQlJyfnsHmhfGAncYx943HlTNM8oG2fiRMnMn78+NjzaDTKrl27yMjIwDAM8vPzyczMJCcn5/BDSXLM6Lgnho57Yui4J4aOe2Icr+NumiZ79+6lefPmh+2bsKDSsGFDnE4n27dvj2vPzc2lSZMmVb7G4/HgqXyhEEhLSzugX2pqqk7kBNBxTwwd98TQcU8MHffEOB7HPRgMVqtfwr7143a76dmzJ3Pnzo1rnzt3Lv369UtQVSIiImInCb30M378eK6++mp69epF3759efHFF8nOzmbMmDGJLEtERERsIqFB5YorrmDnzp08+OCDbNu2jS5duvDBBx/QqlWrGr2fx+PhvvvuO+DykBxfOu6JoeOeGDruiaHjnhh2OO4JvY+KiIiIyKEk/Bb6IiIiIgejoCIiIiK2paAiIiIitqWgIiIiIrZVZ4LKlClTaNOmDV6vl549e7JgwYJEl1Sn3X///RiGEbc0bdo00WXVOZ9//jmDBw+mefPmGIbB7Nmz47abpsn9999P8+bN8fl8nHfeeXz33XeJKbYOOdxxHzVq1AHn/5lnnpmYYuuQSZMm0bt3b1JSUmjcuDFDhw5l3bp1cX10zh971TnuiTzn60RQefPNN7ntttu45557WLZsGWeffTYDBw4kOzs70aXVaaeeeirbtm2LLatWrUp0SXVOYWEh3bt357nnnqty++OPP86TTz7Jc889x+LFi2natCkXXXQRe/fureVK65bDHXeASy65JO78/+CDD2qxwrpp/vz5jB07lkWLFjF37lzC4TAXX3wxhft+rhid88dDdY47JPCcP8rfFrSFPn36mGPGjIlr69ixo3nXXXclqKK677777jO7d++e6DLqFcCcNWtW7Hk0GjWbNm1qPvroo7G2kpISMxgMmi+88EICKqyb9j/upmmaI0eONIcMGZKQeuqT3NxcEzDnz59vmqbO+dqy/3E3zcSe8yf8iEppaSlLlizh4osvjmu/+OKLWbhwYYKqqh82bNhA8+bNadOmDVdeeSXff/99okuqV7Kysti+fXvcue/xeDj33HN17teCefPm0bhxY9q3b8/1119Pbm5uokuqc/Ly8gBIT08HdM7Xlv2P+z6JOudP+KCyY8cOIpHIAT9k2KRJkwN+8FCOnTPOOIN//OMfzJkzh5deeont27fTr18/du7cmejS6o1957fO/do3cOBAXnvtNT799FOeeOIJFi9ezM9+9jNCoVCiS6szTNNk/PjxnHXWWXTp0gXQOV8bqjrukNhzPqG30D+WDMOIe26a5gFtcuwMHDgwtt61a1f69u3LKaecwvTp0xk/fnwCK6t/dO7XviuuuCK23qVLF3r16kWrVq14//33GTZsWAIrqzvGjRvHypUr+eKLLw7YpnP++DnYcU/kOX/Cj6g0bNgQp9N5QJrOzc09IHXL8ZOcnEzXrl3ZsGFDokupN/Z9y0rnfuI1a9aMVq1a6fw/Rm6++WbeffddPvvsM0466aRYu8754+tgx70qtXnOn/BBxe1207NnT+bOnRvXPnfuXPr165egquqfUCjEmjVraNasWaJLqTfatGlD06ZN48790tJS5s+fr3O/lu3cuZOcnByd/0fJNE3GjRvHzJkz+fTTT2nTpk3cdp3zx8fhjntVavOcrxOXfsaPH8/VV19Nr1696Nu3Ly+++CLZ2dmMGTMm0aXVWXfccQeDBw+mZcuW5Obm8vDDD5Ofn8/IkSMTXVqdUlBQwMaNG2PPs7KyWL58Oenp6bRs2ZLbbruNRx55hHbt2tGuXTseeeQR/H4/w4cPT2DVJ75DHff09HTuv/9+LrvsMpo1a8bmzZu5++67adiwIZdeemkCqz7xjR07ltdff5133nmHlJSU2MhJMBjE5/NhGIbO+ePgcMe9oKAgsed8Qr5rdBw8//zzZqtWrUy3222efvrpcV+rkmPviiuuMJs1a2YmJSWZzZs3N4cNG2Z+9913iS6rzvnss89M4IBl5MiRpmlaX9e87777zKZNm5oej8c855xzzFWrViW26DrgUMe9qKjIvPjii81GjRqZSUlJZsuWLc2RI0ea2dnZiS77hFfVMQfMadOmxfronD/2DnfcE33OG+VFioiIiNjOCT9HRUREROouBRURERGxLQUVERERsS0FFREREbEtBRURERGxLQUVERERsS0FFREREbEtBRUROaHNmzcPwzDYs2dPoksRkeNAQUVERERsS0FFREREbEtBRUSOimmaPP7445x88sn4fD66d+/OW2+9BVRclnn//ffp3r07Xq+XM844g1WrVsW9x9tvv82pp56Kx+OhdevWPPHEE3HbQ6EQEyZMIDMzE4/HQ7t27Xj55Zfj+ixZsoRevXrh9/vp168f69ati21bsWIF559/PikpKaSmptKzZ0++/fbb43RERORYqhO/niwiiXPvvfcyc+ZMpk6dSrt27fj888/5zW9+Q6NGjWJ97rzzTiZPnkzTpk25++67+eUvf8n69etJSkpiyZIlXH755dx///1cccUVLFy4kJtuuomMjAxGjRoFwDXXXMNXX33FM888Q/fu3cnKymLHjh1xddxzzz088cQTNGrUiDFjxjB69Gi+/PJLAEaMGEGPHj2YOnUqTqeT5cuXk5SUVGvHSESOQq389KGI1EkFBQWm1+s1Fy5cGNd+7bXXmldddVXsV4hnzJgR27Zz507T5/OZb775pmmapjl8+HDzoosuinv9nXfeaXbu3Nk0TdNct26dCZhz586tsoZ9+/jkk09ibe+//74JmMXFxaZpmmZKSor5yiuvHP0fLCK1Tpd+RKTGVq9eTUlJCRdddBGBQCC2/OMf/2DTpk2xfn379o2tp6en06FDB9asWQPAmjVr6N+/f9z79u/fnw0bNhCJRFi+fDlOp5Nzzz33kLV069Yttt6sWTMAcnNzARg/fjzXXXcdF154IY8++mhcbSJibwoqIlJj0WgUgPfff5/ly5fHltWrV8fmqRyMYRiANcdl3/o+pmnG1n0+X7VqqXwpZ9/77avv/vvv57vvvuPnP/85n376KZ07d2bWrFnVel8RSSwFFRGpsc6dO+PxeMjOzqZt27ZxS2ZmZqzfokWLYuu7d+9m/fr1dOzYMfYeX3zxRdz7Lly4kPbt2+N0OunatSvRaJT58+cfVa3t27fn9ttv5+OPP2bYsGFMmzbtqN5PRGqHJtOKSI2lpKRwxx13cPvttxONRjnrrLPIz89n4cKFBAIBWrVqBcCDDz5IRkYGTZo04Z577qFhw4YMHToUgN///vf07t2bhx56iCuuuIKvvvqK5557jilTpgDQunVrRo4cyejRo2OTabds2UJubi6XX375YWssLi7mzjvv5Fe/+hVt2rThhx9+YPHixVx22WXH7biIyDGU6EkyInJii0aj5uTJk80OHTqYSUlJZqNGjcwBAwaY8+fPj010/c9//mOeeuqpptvtNnv37m0uX7487j3eeusts3PnzmZSUpLZsmVL8y9/+Uvc9uLiYvP22283mzVrZrrdbrNt27bm//t//880zYrJtLt37471X7ZsmQmYWVlZZigUMq+88kozMzPTdLvdZvPmzc1x48bFJtqKiL0ZplnpYrCIyDE0b948zj//fHbv3k1aWlqiyxGRE5DmqIiIiIhtKaiIiIiIbenSj4iIiNiWRlRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2FFRERETEthRURERExLYUVERERMS2/j99ggZIvyrcWgAAAABJRU5ErkJggg==", "text/plain": [ - "
" + "
" ] }, - "metadata": { - "needs_background": "light" - }, + "metadata": {}, "output_type": "display_data" } ], "source": [ - "model.fit(X_train, epochs=25, val_data=X_val, run_validation=True)" + "model.fit(X_train, epochs=25, validation_data=X_val, run_validation=True)\n" ] }, { @@ -310,7 +308,7 @@ "metadata": {}, "outputs": [], "source": [ - "torch.save(model.state_dict(),OUTPUT_MODEL_NAME)" + "torch.save(model.state_dict(),OUTPUT_MODEL_NAME)\n" ] }, { @@ -321,7 +319,7 @@ "source": [ "with open(OUTPUT_MODEL_NAME[:-4]+'dill'+'.pkl', 'wb') as f:\n", " serialized_model = dill.dumps(model)\n", - " f.write(serialized_model)" + " f.write(serialized_model)\n" ] }, { @@ -339,7 +337,7 @@ "outputs": [], "source": [ "scores = model.get_anomaly_score(X_val)\n", - "X_val['anomaly_score'] = scores" + "X_val['anomaly_score'] = scores\n" ] }, { @@ -404,7 +402,7 @@ " success\n", " NaN\n", " user123\n", - " 1.670993\n", + " 1.665046\n", " \n", " \n", " 318\n", @@ -422,7 +420,7 @@ " success\n", " NaN\n", " user123\n", - " 1.610524\n", + " 1.602896\n", " \n", " \n", " 315\n", @@ -440,7 +438,7 @@ " UnsupportedOperation\n", " The specified subnet belongs to a different ac...\n", " user123\n", - " 1.506423\n", + " 1.505153\n", " \n", " \n", " 316\n", @@ -458,7 +456,7 @@ " MissingAction\n", " The input fails to satisfy the constraints spe...\n", " user123\n", - " 1.455665\n", + " 1.448848\n", " \n", " \n", " 317\n", @@ -476,7 +474,7 @@ " MissingAction\n", " The input fails to satisfy the constraints spe...\n", " user123\n", - " 1.455665\n", + " 1.448848\n", " \n", " \n", " 321\n", @@ -494,15 +492,15 @@ " MissingAction\n", " The input fails to satisfy the constraints spe...\n", " user123\n", - " 1.455665\n", + " 1.448848\n", " \n", " \n", - " 741\n", + " 544\n", " Account-123456789\n", " anderson.net\n", " ListReadinessChecks\n", - " 72.5.160.159\n", - " Mozilla/5.0 (compatible; MSIE 8.0; Windows 98;...\n", + " 91.115.171.224\n", + " Mozilla/5.0 (iPad; CPU iPad OS 7_1_2 like Mac ...\n", " Root\n", " 1970-07-15\n", " c605fd31-b71e-4385-a9c9-1cfc259fcbe1\n", @@ -512,15 +510,15 @@ " success\n", " NaN\n", " user123\n", - " 1.150055\n", + " 1.167866\n", " \n", " \n", - " 544\n", + " 741\n", " Account-123456789\n", " anderson.net\n", " ListReadinessChecks\n", - " 91.115.171.224\n", - " Mozilla/5.0 (iPad; CPU iPad OS 7_1_2 like Mac ...\n", + " 72.5.160.159\n", + " Mozilla/5.0 (compatible; MSIE 8.0; Windows 98;...\n", " Root\n", " 1970-07-15\n", " c605fd31-b71e-4385-a9c9-1cfc259fcbe1\n", @@ -530,15 +528,15 @@ " success\n", " NaN\n", " user123\n", - " 1.146068\n", + " 1.149782\n", " \n", " \n", - " 816\n", + " 476\n", " Account-123456789\n", " anderson.net\n", " ListReadinessChecks\n", - " 72.5.160.159\n", - " Mozilla/5.0 (iPad; CPU iPad OS 4_2_1 like Mac ...\n", + " 91.115.171.224\n", + " Mozilla/5.0 (compatible; MSIE 8.0; Windows NT ...\n", " Root\n", " 1970-07-15\n", " c605fd31-b71e-4385-a9c9-1cfc259fcbe1\n", @@ -548,15 +546,15 @@ " success\n", " NaN\n", " user123\n", - " 1.134004\n", + " 1.148451\n", " \n", " \n", - " 476\n", + " 816\n", " Account-123456789\n", " anderson.net\n", " ListReadinessChecks\n", - " 91.115.171.224\n", - " Mozilla/5.0 (compatible; MSIE 8.0; Windows NT ...\n", + " 72.5.160.159\n", + " Mozilla/5.0 (iPad; CPU iPad OS 4_2_1 like Mac ...\n", " Root\n", " 1970-07-15\n", " c605fd31-b71e-4385-a9c9-1cfc259fcbe1\n", @@ -566,7 +564,7 @@ " success\n", " NaN\n", " user123\n", - " 1.126855\n", + " 1.143833\n", " \n", " \n", "\n", @@ -580,10 +578,10 @@ "316 Account-123456789 lopez-byrd.info DescribeSubnets \n", "317 Account-123456789 lopez-byrd.info DescribeSubnets \n", "321 Account-123456789 lopez-byrd.info DescribeSubnets \n", - "741 Account-123456789 anderson.net ListReadinessChecks \n", "544 Account-123456789 anderson.net ListReadinessChecks \n", - "816 Account-123456789 anderson.net ListReadinessChecks \n", + "741 Account-123456789 anderson.net ListReadinessChecks \n", "476 Account-123456789 anderson.net ListReadinessChecks \n", + "816 Account-123456789 anderson.net ListReadinessChecks \n", "\n", " sourceIPAddress userAgent \\\n", "314 23.19.39.232 Opera/8.23.(X11; Linux i686; da-DK) Presto/2.9... \n", @@ -592,10 +590,10 @@ "316 200.228.121.182 Mozilla/5.0 (Windows; U; Windows 98) AppleWebK... \n", "317 200.228.121.182 Mozilla/5.0 (Windows; U; Windows 98) AppleWebK... \n", "321 200.228.121.182 Mozilla/5.0 (Windows; U; Windows 98) AppleWebK... \n", - "741 72.5.160.159 Mozilla/5.0 (compatible; MSIE 8.0; Windows 98;... \n", "544 91.115.171.224 Mozilla/5.0 (iPad; CPU iPad OS 7_1_2 like Mac ... \n", - "816 72.5.160.159 Mozilla/5.0 (iPad; CPU iPad OS 4_2_1 like Mac ... \n", + "741 72.5.160.159 Mozilla/5.0 (compatible; MSIE 8.0; Windows 98;... \n", "476 91.115.171.224 Mozilla/5.0 (compatible; MSIE 8.0; Windows NT ... \n", + "816 72.5.160.159 Mozilla/5.0 (iPad; CPU iPad OS 4_2_1 like Mac ... \n", "\n", " userIdentitytype apiVersion userIdentityprincipalId \\\n", "314 Root NaN c605fd31-b71e-4385-a9c9-1cfc259fcbe1 \n", @@ -604,10 +602,10 @@ "316 Root NaN c605fd31-b71e-4385-a9c9-1cfc259fcbe1 \n", "317 Root NaN c605fd31-b71e-4385-a9c9-1cfc259fcbe1 \n", "321 Root NaN c605fd31-b71e-4385-a9c9-1cfc259fcbe1 \n", - "741 Root 1970-07-15 c605fd31-b71e-4385-a9c9-1cfc259fcbe1 \n", "544 Root 1970-07-15 c605fd31-b71e-4385-a9c9-1cfc259fcbe1 \n", - "816 Root 1970-07-15 c605fd31-b71e-4385-a9c9-1cfc259fcbe1 \n", + "741 Root 1970-07-15 c605fd31-b71e-4385-a9c9-1cfc259fcbe1 \n", "476 Root 1970-07-15 c605fd31-b71e-4385-a9c9-1cfc259fcbe1 \n", + "816 Root 1970-07-15 c605fd31-b71e-4385-a9c9-1cfc259fcbe1 \n", "\n", " userIdentityarn userIdentityaccessKeyId \\\n", "314 arn:aws:8ab33856-b311-4fa8-bab1-2a7fad0ec33d AZM8CDAQJ8EN63D9DV3H \n", @@ -616,10 +614,10 @@ "316 arn:aws:8ab33856-b311-4fa8-bab1-2a7fad0ec33d AZM8CDAQJ8EN63D9DV3H \n", "317 arn:aws:8ab33856-b311-4fa8-bab1-2a7fad0ec33d AZM8CDAQJ8EN63D9DV3H \n", "321 arn:aws:8ab33856-b311-4fa8-bab1-2a7fad0ec33d AZM8CDAQJ8EN63D9DV3H \n", - "741 arn:aws:8ab33856-b311-4fa8-bab1-2a7fad0ec33d AMV0GEBKYXVMOZWRG7PN \n", "544 arn:aws:8ab33856-b311-4fa8-bab1-2a7fad0ec33d AMV0GEBKYXVMOZWRG7PN \n", - "816 arn:aws:8ab33856-b311-4fa8-bab1-2a7fad0ec33d AMV0GEBKYXVMOZWRG7PN \n", + "741 arn:aws:8ab33856-b311-4fa8-bab1-2a7fad0ec33d AMV0GEBKYXVMOZWRG7PN \n", "476 arn:aws:8ab33856-b311-4fa8-bab1-2a7fad0ec33d AMV0GEBKYXVMOZWRG7PN \n", + "816 arn:aws:8ab33856-b311-4fa8-bab1-2a7fad0ec33d AMV0GEBKYXVMOZWRG7PN \n", "\n", " tlsDetailsclientProvidedHostHeader errorCode \\\n", "314 hooper.com success \n", @@ -628,10 +626,10 @@ "316 NaN MissingAction \n", "317 NaN MissingAction \n", "321 NaN MissingAction \n", - "741 lester.net success \n", "544 lester.net success \n", - "816 lester.net success \n", + "741 lester.net success \n", "476 lester.net success \n", + "816 lester.net success \n", "\n", " errorMessage \\\n", "314 NaN \n", @@ -640,22 +638,22 @@ "316 The input fails to satisfy the constraints spe... \n", "317 The input fails to satisfy the constraints spe... \n", "321 The input fails to satisfy the constraints spe... \n", - "741 NaN \n", "544 NaN \n", - "816 NaN \n", + "741 NaN \n", "476 NaN \n", + "816 NaN \n", "\n", " userIdentitysessionContextsessionIssueruserName anomaly_score \n", - "314 user123 1.670993 \n", - "318 user123 1.610524 \n", - "315 user123 1.506423 \n", - "316 user123 1.455665 \n", - "317 user123 1.455665 \n", - "321 user123 1.455665 \n", - "741 user123 1.150055 \n", - "544 user123 1.146068 \n", - "816 user123 1.134004 \n", - "476 user123 1.126855 " + "314 user123 1.665046 \n", + "318 user123 1.602896 \n", + "315 user123 1.505153 \n", + "316 user123 1.448848 \n", + "317 user123 1.448848 \n", + "321 user123 1.448848 \n", + "544 user123 1.167866 \n", + "741 user123 1.149782 \n", + "476 user123 1.148451 \n", + "816 user123 1.143833 " ] }, "execution_count": 16, @@ -666,7 +664,7 @@ "source": [ "#scores = model.get_anomaly_score(X_val)\n", "X_val['anomaly_score'] = scores\n", - "X_val.sort_values('anomaly_score', ascending=False).head(10)" + "X_val.sort_values('anomaly_score', ascending=False).head(10)\n" ] }, { @@ -675,7 +673,7 @@ "metadata": {}, "outputs": [], "source": [ - "X_val[\"zscore\"] = (X_val[\"anomaly_score\"] - X_val[\"anomaly_score\"].mean())/X_val[\"anomaly_score\"].std()" + "X_val[\"zscore\"] = (X_val[\"anomaly_score\"] - X_val[\"anomaly_score\"].mean())/X_val[\"anomaly_score\"].std()\n" ] }, { @@ -741,8 +739,8 @@ " success\n", " NaN\n", " user123\n", - " 1.670993\n", - " 5.653509\n", + " 1.665046\n", + " 5.557190\n", " \n", " \n", " 318\n", @@ -760,8 +758,8 @@ " success\n", " NaN\n", " user123\n", - " 1.610524\n", - " 5.386526\n", + " 1.602896\n", + " 5.286483\n", " \n", " \n", " 315\n", @@ -779,8 +777,8 @@ " UnsupportedOperation\n", " The specified subnet belongs to a different ac...\n", " user123\n", - " 1.506423\n", - " 4.926896\n", + " 1.505153\n", + " 4.860737\n", " \n", " \n", " 316\n", @@ -798,8 +796,8 @@ " MissingAction\n", " The input fails to satisfy the constraints spe...\n", " user123\n", - " 1.455665\n", - " 4.702792\n", + " 1.448848\n", + " 4.615488\n", " \n", " \n", " 317\n", @@ -817,8 +815,8 @@ " MissingAction\n", " The input fails to satisfy the constraints spe...\n", " user123\n", - " 1.455665\n", - " 4.702792\n", + " 1.448848\n", + " 4.615488\n", " \n", " \n", " 321\n", @@ -836,16 +834,16 @@ " MissingAction\n", " The input fails to satisfy the constraints spe...\n", " user123\n", - " 1.455665\n", - " 4.702792\n", + " 1.448848\n", + " 4.615488\n", " \n", " \n", - " 741\n", + " 544\n", " Account-123456789\n", " anderson.net\n", " ListReadinessChecks\n", - " 72.5.160.159\n", - " Mozilla/5.0 (compatible; MSIE 8.0; Windows 98;...\n", + " 91.115.171.224\n", + " Mozilla/5.0 (iPad; CPU iPad OS 7_1_2 like Mac ...\n", " Root\n", " 1970-07-15\n", " c605fd31-b71e-4385-a9c9-1cfc259fcbe1\n", @@ -855,16 +853,16 @@ " success\n", " NaN\n", " user123\n", - " 1.150055\n", - " 3.353450\n", + " 1.167866\n", + " 3.391602\n", " \n", " \n", - " 544\n", + " 741\n", " Account-123456789\n", " anderson.net\n", " ListReadinessChecks\n", - " 91.115.171.224\n", - " Mozilla/5.0 (iPad; CPU iPad OS 7_1_2 like Mac ...\n", + " 72.5.160.159\n", + " Mozilla/5.0 (compatible; MSIE 8.0; Windows 98;...\n", " Root\n", " 1970-07-15\n", " c605fd31-b71e-4385-a9c9-1cfc259fcbe1\n", @@ -874,16 +872,16 @@ " success\n", " NaN\n", " user123\n", - " 1.146068\n", - " 3.335850\n", + " 1.149782\n", + " 3.312836\n", " \n", " \n", - " 816\n", + " 476\n", " Account-123456789\n", " anderson.net\n", " ListReadinessChecks\n", - " 72.5.160.159\n", - " Mozilla/5.0 (iPad; CPU iPad OS 4_2_1 like Mac ...\n", + " 91.115.171.224\n", + " Mozilla/5.0 (compatible; MSIE 8.0; Windows NT ...\n", " Root\n", " 1970-07-15\n", " c605fd31-b71e-4385-a9c9-1cfc259fcbe1\n", @@ -893,16 +891,16 @@ " success\n", " NaN\n", " user123\n", - " 1.134004\n", - " 3.282584\n", + " 1.148451\n", + " 3.307036\n", " \n", " \n", - " 476\n", + " 816\n", " Account-123456789\n", " anderson.net\n", " ListReadinessChecks\n", - " 91.115.171.224\n", - " Mozilla/5.0 (compatible; MSIE 8.0; Windows NT ...\n", + " 72.5.160.159\n", + " Mozilla/5.0 (iPad; CPU iPad OS 4_2_1 like Mac ...\n", " Root\n", " 1970-07-15\n", " c605fd31-b71e-4385-a9c9-1cfc259fcbe1\n", @@ -912,8 +910,8 @@ " success\n", " NaN\n", " user123\n", - " 1.126855\n", - " 3.251018\n", + " 1.143833\n", + " 3.286921\n", " \n", " \n", "\n", @@ -927,10 +925,10 @@ "316 Account-123456789 lopez-byrd.info DescribeSubnets \n", "317 Account-123456789 lopez-byrd.info DescribeSubnets \n", "321 Account-123456789 lopez-byrd.info DescribeSubnets \n", - "741 Account-123456789 anderson.net ListReadinessChecks \n", "544 Account-123456789 anderson.net ListReadinessChecks \n", - "816 Account-123456789 anderson.net ListReadinessChecks \n", + "741 Account-123456789 anderson.net ListReadinessChecks \n", "476 Account-123456789 anderson.net ListReadinessChecks \n", + "816 Account-123456789 anderson.net ListReadinessChecks \n", "\n", " sourceIPAddress userAgent \\\n", "314 23.19.39.232 Opera/8.23.(X11; Linux i686; da-DK) Presto/2.9... \n", @@ -939,10 +937,10 @@ "316 200.228.121.182 Mozilla/5.0 (Windows; U; Windows 98) AppleWebK... \n", "317 200.228.121.182 Mozilla/5.0 (Windows; U; Windows 98) AppleWebK... \n", "321 200.228.121.182 Mozilla/5.0 (Windows; U; Windows 98) AppleWebK... \n", - "741 72.5.160.159 Mozilla/5.0 (compatible; MSIE 8.0; Windows 98;... \n", "544 91.115.171.224 Mozilla/5.0 (iPad; CPU iPad OS 7_1_2 like Mac ... \n", - "816 72.5.160.159 Mozilla/5.0 (iPad; CPU iPad OS 4_2_1 like Mac ... \n", + "741 72.5.160.159 Mozilla/5.0 (compatible; MSIE 8.0; Windows 98;... \n", "476 91.115.171.224 Mozilla/5.0 (compatible; MSIE 8.0; Windows NT ... \n", + "816 72.5.160.159 Mozilla/5.0 (iPad; CPU iPad OS 4_2_1 like Mac ... \n", "\n", " userIdentitytype apiVersion userIdentityprincipalId \\\n", "314 Root NaN c605fd31-b71e-4385-a9c9-1cfc259fcbe1 \n", @@ -951,10 +949,10 @@ "316 Root NaN c605fd31-b71e-4385-a9c9-1cfc259fcbe1 \n", "317 Root NaN c605fd31-b71e-4385-a9c9-1cfc259fcbe1 \n", "321 Root NaN c605fd31-b71e-4385-a9c9-1cfc259fcbe1 \n", - "741 Root 1970-07-15 c605fd31-b71e-4385-a9c9-1cfc259fcbe1 \n", "544 Root 1970-07-15 c605fd31-b71e-4385-a9c9-1cfc259fcbe1 \n", - "816 Root 1970-07-15 c605fd31-b71e-4385-a9c9-1cfc259fcbe1 \n", + "741 Root 1970-07-15 c605fd31-b71e-4385-a9c9-1cfc259fcbe1 \n", "476 Root 1970-07-15 c605fd31-b71e-4385-a9c9-1cfc259fcbe1 \n", + "816 Root 1970-07-15 c605fd31-b71e-4385-a9c9-1cfc259fcbe1 \n", "\n", " userIdentityarn userIdentityaccessKeyId \\\n", "314 arn:aws:8ab33856-b311-4fa8-bab1-2a7fad0ec33d AZM8CDAQJ8EN63D9DV3H \n", @@ -963,10 +961,10 @@ "316 arn:aws:8ab33856-b311-4fa8-bab1-2a7fad0ec33d AZM8CDAQJ8EN63D9DV3H \n", "317 arn:aws:8ab33856-b311-4fa8-bab1-2a7fad0ec33d AZM8CDAQJ8EN63D9DV3H \n", "321 arn:aws:8ab33856-b311-4fa8-bab1-2a7fad0ec33d AZM8CDAQJ8EN63D9DV3H \n", - "741 arn:aws:8ab33856-b311-4fa8-bab1-2a7fad0ec33d AMV0GEBKYXVMOZWRG7PN \n", "544 arn:aws:8ab33856-b311-4fa8-bab1-2a7fad0ec33d AMV0GEBKYXVMOZWRG7PN \n", - "816 arn:aws:8ab33856-b311-4fa8-bab1-2a7fad0ec33d AMV0GEBKYXVMOZWRG7PN \n", + "741 arn:aws:8ab33856-b311-4fa8-bab1-2a7fad0ec33d AMV0GEBKYXVMOZWRG7PN \n", "476 arn:aws:8ab33856-b311-4fa8-bab1-2a7fad0ec33d AMV0GEBKYXVMOZWRG7PN \n", + "816 arn:aws:8ab33856-b311-4fa8-bab1-2a7fad0ec33d AMV0GEBKYXVMOZWRG7PN \n", "\n", " tlsDetailsclientProvidedHostHeader errorCode \\\n", "314 hooper.com success \n", @@ -975,10 +973,10 @@ "316 NaN MissingAction \n", "317 NaN MissingAction \n", "321 NaN MissingAction \n", - "741 lester.net success \n", "544 lester.net success \n", - "816 lester.net success \n", + "741 lester.net success \n", "476 lester.net success \n", + "816 lester.net success \n", "\n", " errorMessage \\\n", "314 NaN \n", @@ -987,22 +985,22 @@ "316 The input fails to satisfy the constraints spe... \n", "317 The input fails to satisfy the constraints spe... \n", "321 The input fails to satisfy the constraints spe... \n", - "741 NaN \n", "544 NaN \n", - "816 NaN \n", + "741 NaN \n", "476 NaN \n", + "816 NaN \n", "\n", " userIdentitysessionContextsessionIssueruserName anomaly_score zscore \n", - "314 user123 1.670993 5.653509 \n", - "318 user123 1.610524 5.386526 \n", - "315 user123 1.506423 4.926896 \n", - "316 user123 1.455665 4.702792 \n", - "317 user123 1.455665 4.702792 \n", - "321 user123 1.455665 4.702792 \n", - "741 user123 1.150055 3.353450 \n", - "544 user123 1.146068 3.335850 \n", - "816 user123 1.134004 3.282584 \n", - "476 user123 1.126855 3.251018 " + "314 user123 1.665046 5.557190 \n", + "318 user123 1.602896 5.286483 \n", + "315 user123 1.505153 4.860737 \n", + "316 user123 1.448848 4.615488 \n", + "317 user123 1.448848 4.615488 \n", + "321 user123 1.448848 4.615488 \n", + "544 user123 1.167866 3.391602 \n", + "741 user123 1.149782 3.312836 \n", + "476 user123 1.148451 3.307036 \n", + "816 user123 1.143833 3.286921 " ] }, "execution_count": 18, @@ -1011,7 +1009,7 @@ } ], "source": [ - "X_val.sort_values('zscore', ascending=False).head(10)" + "X_val.sort_values('zscore', ascending=False).head(10)\n" ] }, { @@ -1083,8 +1081,8 @@ " success\n", " NaN\n", " user123\n", - " 1.670993\n", - " 5.653509\n", + " 1.665046\n", + " 5.557190\n", " \n", " \n", " 315\n", @@ -1102,8 +1100,8 @@ " UnsupportedOperation\n", " The specified subnet belongs to a different ac...\n", " user123\n", - " 1.506423\n", - " 4.926896\n", + " 1.505153\n", + " 4.860737\n", " \n", " \n", " 316\n", @@ -1121,8 +1119,8 @@ " MissingAction\n", " The input fails to satisfy the constraints spe...\n", " user123\n", - " 1.455665\n", - " 4.702792\n", + " 1.448848\n", + " 4.615488\n", " \n", " \n", " 317\n", @@ -1140,8 +1138,8 @@ " MissingAction\n", " The input fails to satisfy the constraints spe...\n", " user123\n", - " 1.455665\n", - " 4.702792\n", + " 1.448848\n", + " 4.615488\n", " \n", " \n", " 318\n", @@ -1159,8 +1157,8 @@ " success\n", " NaN\n", " user123\n", - " 1.610524\n", - " 5.386526\n", + " 1.602896\n", + " 5.286483\n", " \n", " \n", " 321\n", @@ -1178,8 +1176,8 @@ " MissingAction\n", " The input fails to satisfy the constraints spe...\n", " user123\n", - " 1.455665\n", - " 4.702792\n", + " 1.448848\n", + " 4.615488\n", " \n", " \n", "\n", @@ -1235,12 +1233,12 @@ "321 The input fails to satisfy the constraints spe... \n", "\n", " userIdentitysessionContextsessionIssueruserName anomaly_score zscore \n", - "314 user123 1.670993 5.653509 \n", - "315 user123 1.506423 4.926896 \n", - "316 user123 1.455665 4.702792 \n", - "317 user123 1.455665 4.702792 \n", - "318 user123 1.610524 5.386526 \n", - "321 user123 1.455665 4.702792 " + "314 user123 1.665046 5.557190 \n", + "315 user123 1.505153 4.860737 \n", + "316 user123 1.448848 4.615488 \n", + "317 user123 1.448848 4.615488 \n", + "318 user123 1.602896 5.286483 \n", + "321 user123 1.448848 4.615488 " ] }, "execution_count": 19, @@ -1249,7 +1247,7 @@ } ], "source": [ - "X_val[X_val['zscore']>4]" + "X_val[X_val['zscore']>4]\n" ] }, { @@ -1275,7 +1273,7 @@ "version": "0.3.2" }, "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -1289,7 +1287,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.10.12" } }, "nbformat": 4, diff --git a/models/training-tuning-scripts/dfp-models/requirements.yml b/models/training-tuning-scripts/dfp-models/requirements.yml new file mode 100644 index 0000000000..902499da60 --- /dev/null +++ b/models/training-tuning-scripts/dfp-models/requirements.yml @@ -0,0 +1,29 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +channels: + - nvidia + - pytorch + - conda-forge +dependencies: + - dill + - jupyterlab + - matplotlib + - pandas=1.3 + - python=3.10 + - pytorch-cuda=11.8 + - pytorch=2.0.1 + - scikit-learn=1.2.2 + - seaborn diff --git a/models/training-tuning-scripts/fraud-detection-models/requirements.yml b/models/training-tuning-scripts/fraud-detection-models/requirements.yml index 7fe973ff1d..11df049834 100644 --- a/models/training-tuning-scripts/fraud-detection-models/requirements.yml +++ b/models/training-tuning-scripts/fraud-detection-models/requirements.yml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -14,17 +14,20 @@ # limitations under the License. channels: - - rapidsai - - nvidia - - conda-forge + - rapidsai + - nvidia + - dglteam/label/cu118 + - pytorch + - conda-forge dependencies: - - click==8.1.3 - - cuml=23.06 - - dgl==1.0.2+cu118 - - numpy==1.23.5 - - pandas==1.5.3 - - scikit_learn==1.2.2 - - torch==2.0.0+cu118 - - torchmetrics==0.11.4 - - tqdm==4.65.0 - - xgboost==1.7.1 + - click>=8 + - cuml=23.06 + - dgl + - jupyterlab + - matplotlib + - pytorch-cuda=11.8 + - pytorch=2.0.1 + - scikit-learn=1.2.2 + - tqdm=4 + - torchmetrics + - xgboost diff --git a/models/training-tuning-scripts/log-parsing-models/requirements.txt b/models/training-tuning-scripts/log-parsing-models/requirements.txt deleted file mode 100644 index 2fe1bb7bdc..0000000000 --- a/models/training-tuning-scripts/log-parsing-models/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -onnx=1.14.0 -seqeval==1.2.2 -transformers==4.22.2 diff --git a/models/training-tuning-scripts/log-parsing-models/requirements.yml b/models/training-tuning-scripts/log-parsing-models/requirements.yml new file mode 100644 index 0000000000..1132ddb083 --- /dev/null +++ b/models/training-tuning-scripts/log-parsing-models/requirements.yml @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +channels: + - rapidsai + - nvidia + - pytorch + - conda-forge +dependencies: + - cudf=23.06 + - jupyterlab + - onnx + - python=3.10 + - pytorch-cuda=11.8 + - pytorch=2.0.1 + - seqeval=1.2.2 + - tqdm=4 + - transformers diff --git a/models/training-tuning-scripts/phishing-models/requirements.txt b/models/training-tuning-scripts/phishing-models/requirements.txt deleted file mode 100644 index 31b92154d4..0000000000 --- a/models/training-tuning-scripts/phishing-models/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -numpy==1.22.4 -pandas==1.3.5 -onnx=1.14.0 -scikit_learn==1.1.3 -tqdm==4.64.1 -transformers==4.24.0 diff --git a/models/training-tuning-scripts/phishing-models/requirements.yml b/models/training-tuning-scripts/phishing-models/requirements.yml new file mode 100644 index 0000000000..d87c9ccc88 --- /dev/null +++ b/models/training-tuning-scripts/phishing-models/requirements.yml @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +channels: + - rapidsai + - nvidia + - pytorch + - conda-forge +dependencies: + - cudf=23.06 + - jupyterlab + - onnx + - python=3.10 + - pytorch-cuda=11.8 + - pytorch=2.0.1 + - scikit-learn=1.2.2 + - tqdm=4 + - transformers diff --git a/models/training-tuning-scripts/ransomware-models/requirements.txt b/models/training-tuning-scripts/ransomware-models/requirements.txt deleted file mode 100644 index dcda2fd6fc..0000000000 --- a/models/training-tuning-scripts/ransomware-models/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -seaborn==0.12.0 diff --git a/models/training-tuning-scripts/ransomware-models/requirements.yml b/models/training-tuning-scripts/ransomware-models/requirements.yml new file mode 100644 index 0000000000..f6495ecf08 --- /dev/null +++ b/models/training-tuning-scripts/ransomware-models/requirements.yml @@ -0,0 +1,24 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +channels: + - conda-forge +dependencies: + - jupyterlab + - matplotlib + - pandas=1.3 + - python=3.10 + - scikit-learn=1.2.2 + - seaborn diff --git a/models/training-tuning-scripts/root-cause-models/requirements.txt b/models/training-tuning-scripts/root-cause-models/requirements.txt deleted file mode 100644 index 63b8164060..0000000000 --- a/models/training-tuning-scripts/root-cause-models/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -numpy==1.22.4 -onnx==1.14.0 -pandas==1.3.5 -scikit_learn==1.1.3 -tqdm==4.64.1 -transformers==4.24.0 diff --git a/models/training-tuning-scripts/root-cause-models/requirements.yml b/models/training-tuning-scripts/root-cause-models/requirements.yml new file mode 100644 index 0000000000..d87c9ccc88 --- /dev/null +++ b/models/training-tuning-scripts/root-cause-models/requirements.yml @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +channels: + - rapidsai + - nvidia + - pytorch + - conda-forge +dependencies: + - cudf=23.06 + - jupyterlab + - onnx + - python=3.10 + - pytorch-cuda=11.8 + - pytorch=2.0.1 + - scikit-learn=1.2.2 + - tqdm=4 + - transformers diff --git a/models/training-tuning-scripts/sid-models/requirements.txt b/models/training-tuning-scripts/sid-models/requirements.txt deleted file mode 100644 index 816cdc442e..0000000000 --- a/models/training-tuning-scripts/sid-models/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -scikit_learn==1.1.3 -transformers==4.24.0 diff --git a/models/training-tuning-scripts/sid-models/requirements.yml b/models/training-tuning-scripts/sid-models/requirements.yml new file mode 100644 index 0000000000..d87c9ccc88 --- /dev/null +++ b/models/training-tuning-scripts/sid-models/requirements.yml @@ -0,0 +1,30 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +channels: + - rapidsai + - nvidia + - pytorch + - conda-forge +dependencies: + - cudf=23.06 + - jupyterlab + - onnx + - python=3.10 + - pytorch-cuda=11.8 + - pytorch=2.0.1 + - scikit-learn=1.2.2 + - tqdm=4 + - transformers diff --git a/models/training-tuning-scripts/sid-models/sid-minibert-20230424.ipynb b/models/training-tuning-scripts/sid-models/sid-minibert-20230424.ipynb index a5564de42b..72a94489ec 100644 --- a/models/training-tuning-scripts/sid-models/sid-minibert-20230424.ipynb +++ b/models/training-tuning-scripts/sid-models/sid-minibert-20230424.ipynb @@ -31,7 +31,6 @@ "outputs": [], "source": [ "from os import path\n", - "import s3fs\n", "import torch\n", "from torch.nn import BCEWithLogitsLoss\n", "from transformers import AutoModelForSequenceClassification, AdamW\n", From ea8579a03ca883ef04b0517da719a915ebe2c382 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 19 Oct 2023 09:43:56 -0700 Subject: [PATCH 60/65] WIP: Rename BaseStage to StageBase --- morpheus/pipeline/__init__.py | 2 +- morpheus/pipeline/base_stage.py | 10 +++++----- morpheus/pipeline/linear_pipeline.py | 2 +- morpheus/pipeline/pipeline.py | 18 +++++++++--------- morpheus/pipeline/receiver.py | 2 +- morpheus/pipeline/sender.py | 4 ++-- morpheus/pipeline/source_stage.py | 2 +- morpheus/pipeline/stage.py | 2 +- morpheus/pipeline/stage_schema.py | 4 ++-- tests/pipeline/conftest.py | 4 ++-- 10 files changed, 25 insertions(+), 25 deletions(-) diff --git a/morpheus/pipeline/__init__.py b/morpheus/pipeline/__init__.py index 6984c702b5..6f0367fc9e 100644 --- a/morpheus/pipeline/__init__.py +++ b/morpheus/pipeline/__init__.py @@ -22,7 +22,7 @@ from morpheus.pipeline.stage_schema import StageSchema from morpheus.pipeline.sender import Sender from morpheus.pipeline.receiver import Receiver -from morpheus.pipeline.base_stage import BaseStage +from morpheus.pipeline.base_stage import StageBase from morpheus.pipeline.stage import Stage from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.pipeline.multi_message_stage import MultiMessageStage diff --git a/morpheus/pipeline/base_stage.py b/morpheus/pipeline/base_stage.py index 4f9a08bdaa..500cbd4c1c 100644 --- a/morpheus/pipeline/base_stage.py +++ b/morpheus/pipeline/base_stage.py @@ -38,7 +38,7 @@ def _save_init_vals(func: _DecoratorType) -> _DecoratorType: sig = inspect.signature(func, follow_wrapped=True) @functools.wraps(func) - def inner(self: "BaseStage", *args, **kwargs): + def inner(self: "StageBase", *args, **kwargs): # Actually call init first. This way any super classes strings will be overridden func(self, *args, **kwargs) @@ -63,7 +63,7 @@ def inner(self: "BaseStage", *args, **kwargs): return typing.cast(_DecoratorType, inner) -class BaseStage(ABC, collections.abc.Hashable): +class StageBase(ABC, collections.abc.Hashable): """ This abstract class serves as the morpheus pipeline's base class. This class wraps a `mrc.SegmentObject` object and aids in hooking stages up together. @@ -81,7 +81,7 @@ def __init__(self, config: Config): # Save the config self._config = config - self._id = BaseStage.__ID_COUNTER.get_and_inc() + self._id = StageBase.__ID_COUNTER.get_and_inc() self._pipeline: _pipeline.Pipeline = None self._init_str: str = "" # Stores the initialization parameters used for creation. Needed for __repr__ @@ -228,7 +228,7 @@ def get_all_inputs(self) -> list[_pipeline.Sender]: return senders - def get_all_input_stages(self) -> list["BaseStage"]: + def get_all_input_stages(self) -> list["StageBase"]: """ Get all input stages to this stage. @@ -255,7 +255,7 @@ def get_all_outputs(self) -> list[_pipeline.Receiver]: return receivers - def get_all_output_stages(self) -> list["BaseStage"]: + def get_all_output_stages(self) -> list["StageBase"]: """ Get all output stages from this stage. diff --git a/morpheus/pipeline/linear_pipeline.py b/morpheus/pipeline/linear_pipeline.py index fc4cdefadd..4426a1ffad 100644 --- a/morpheus/pipeline/linear_pipeline.py +++ b/morpheus/pipeline/linear_pipeline.py @@ -44,7 +44,7 @@ def __init__(self, c: Config): self._next_segment_index = 0 self._increment_segment_id() - self._linear_stages: typing.List[_pipeline.BaseStage] = [] + self._linear_stages: typing.List[_pipeline.StageBase] = [] def _increment_segment_id(self): self._linear_stages = [] diff --git a/morpheus/pipeline/pipeline.py b/morpheus/pipeline/pipeline.py index 9395272385..7d26eb5209 100644 --- a/morpheus/pipeline/pipeline.py +++ b/morpheus/pipeline/pipeline.py @@ -29,7 +29,7 @@ import cudf from morpheus.config import Config -from morpheus.pipeline.base_stage import BaseStage +from morpheus.pipeline.base_stage import StageBase from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.receiver import Receiver from morpheus.pipeline.sender import Sender @@ -39,7 +39,7 @@ logger = logging.getLogger(__name__) -StageT = typing.TypeVar("StageT", bound=BaseStage) +StageT = typing.TypeVar("StageT", bound=StageBase) class Pipeline(): @@ -131,7 +131,7 @@ def add_stage(self, stage: StageT, segment_id: str = "main") -> StageT: return stage def add_edge(self, - start: typing.Union[BaseStage, Sender], + start: typing.Union[StageBase, Sender], end: typing.Union[Stage, Receiver], segment_id: str = "main"): """ @@ -151,7 +151,7 @@ def add_edge(self, """ self._assert_not_built() - if (isinstance(start, BaseStage)): + if (isinstance(start, StageBase)): assert len(start.output_ports) > 0, \ "Cannot call `add_edge` with a stage with no output ports as the `start` parameter" assert len(start.output_ports) == 1, \ @@ -268,7 +268,7 @@ def _pre_build(self): # Finally, execute the link phase (only necessary for circular pipelines) # for s in source_and_stages: for stage in segment_graph.nodes(): - for port in typing.cast(BaseStage, stage).input_ports: + for port in typing.cast(StageBase, stage).input_ports: port.link_schema() logger.info("====Pre-Building Segment Complete!====") @@ -324,7 +324,7 @@ def inner_build(builder: mrc.Builder, segment_id: str): # Finally, execute the link phase (only necessary for circular pipelines) for stage in segment_graph.nodes(): - for port in typing.cast(BaseStage, stage).input_ports: + for port in typing.cast(StageBase, stage).input_ports: port.link_node(builder=builder) asyncio.run(self._async_start(segment_graph.nodes())) @@ -467,7 +467,7 @@ def visualize(self, filename: str = None, **graph_kwargs): start_def_port = ":e" if is_lr else ":s" end_def_port = ":w" if is_lr else ":n" - def has_ports(node: BaseStage, is_input): + def has_ports(node: StageBase, is_input): if (is_input): return len(node.input_ports) > 0 @@ -478,7 +478,7 @@ def has_ports(node: BaseStage, is_input): gv_subgraphs[segment_id] = graphviz.Digraph(f"cluster_{segment_id}") gv_subgraph = gv_subgraphs[segment_id] gv_subgraph.attr(label=segment_id) - for name, attrs in typing.cast(typing.Mapping[BaseStage, dict], + for name, attrs in typing.cast(typing.Mapping[StageBase, dict], self._segment_graphs[segment_id].nodes).items(): node_attrs = attrs.copy() @@ -517,7 +517,7 @@ def has_ports(node: BaseStage, is_input): # Build up edges for segment_id in self._segments: gv_subgraph = gv_subgraphs[segment_id] - for e, attrs in typing.cast(typing.Mapping[typing.Tuple[BaseStage, BaseStage], dict], + for e, attrs in typing.cast(typing.Mapping[typing.Tuple[StageBase, StageBase], dict], self._segment_graphs[segment_id].edges()).items(): # noqa: E501 edge_attrs = {} diff --git a/morpheus/pipeline/receiver.py b/morpheus/pipeline/receiver.py index f1532e2ca9..8dff1b365c 100644 --- a/morpheus/pipeline/receiver.py +++ b/morpheus/pipeline/receiver.py @@ -35,7 +35,7 @@ class Receiver(): Receiver port number. """ - def __init__(self, parent: "_pipeline.BaseStage", port_number: int): + def __init__(self, parent: "_pipeline.StageBase", port_number: int): self._parent = parent self.port_number = port_number diff --git a/morpheus/pipeline/sender.py b/morpheus/pipeline/sender.py index d9696fa81e..4d2ea49ca0 100644 --- a/morpheus/pipeline/sender.py +++ b/morpheus/pipeline/sender.py @@ -34,7 +34,7 @@ class Sender(): Sender port number. """ - def __init__(self, parent: "_pipeline.BaseStage", port_number: int): + def __init__(self, parent: "_pipeline.StageBase", port_number: int): self._parent = parent self.port_number = port_number @@ -45,7 +45,7 @@ def __init__(self, parent: "_pipeline.BaseStage", port_number: int): self._output_node: mrc.SegmentObject = None @property - def parent(self) -> "_pipeline.BaseStage": + def parent(self) -> "_pipeline.StageBase": return self._parent @property diff --git a/morpheus/pipeline/source_stage.py b/morpheus/pipeline/source_stage.py index 8ca491356a..9fff24a462 100644 --- a/morpheus/pipeline/source_stage.py +++ b/morpheus/pipeline/source_stage.py @@ -24,7 +24,7 @@ logger = logging.getLogger(__name__) -class SourceStage(_pipeline.BaseStage): +class SourceStage(_pipeline.StageBase): """ The SourceStage is mandatory for the Morpheus pipeline to run. This stage represents the start of the pipeline. All `SourceStage` object take no input but generate output. diff --git a/morpheus/pipeline/stage.py b/morpheus/pipeline/stage.py index 2ea202db22..433da02291 100644 --- a/morpheus/pipeline/stage.py +++ b/morpheus/pipeline/stage.py @@ -22,7 +22,7 @@ logger = logging.getLogger(__name__) -class Stage(_pipeline.BaseStage): +class Stage(_pipeline.StageBase): """ This class serves as the base for all pipeline stage implementations that are not source objects. diff --git a/morpheus/pipeline/stage_schema.py b/morpheus/pipeline/stage_schema.py index 021dc26b44..e1a6562f3f 100644 --- a/morpheus/pipeline/stage_schema.py +++ b/morpheus/pipeline/stage_schema.py @@ -16,7 +16,7 @@ import typing if typing.TYPE_CHECKING: - from .base_stage import BaseStage + from .base_stage import StageBase class PortSchema: @@ -43,7 +43,7 @@ def is_complete(self) -> bool: class StageSchema: - def __init__(self, stage: "BaseStage"): + def __init__(self, stage: "StageBase"): self._input_schemas = [] for port in stage.input_ports: input_schema = port.get_input_schema() diff --git a/tests/pipeline/conftest.py b/tests/pipeline/conftest.py index 7f21ff0eff..7a5e939760 100644 --- a/tests/pipeline/conftest.py +++ b/tests/pipeline/conftest.py @@ -22,7 +22,7 @@ from _utils.stages.split_stage import SplitStage from morpheus.config import Config from morpheus.pipeline import Pipeline -from morpheus.pipeline.base_stage import BaseStage +from morpheus.pipeline.base_stage import StageBase from morpheus.pipeline.source_stage import SourceStage from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage from morpheus.stages.preprocess.deserialize_stage import DeserializeStage @@ -40,7 +40,7 @@ def in_mem_multi_source_stage_fixture(config: Config): yield InMemoryMultiSourceStage(config, data=data) -def _build_ports(config: Config, source_stage: SourceStage, stage: BaseStage): +def _build_ports(config: Config, source_stage: SourceStage, stage: StageBase): pipe = Pipeline(config) pipe.add_stage(source_stage) pipe.add_stage(stage) From 27d9ca5247f352caa4cf50ea4e2d943c81f02cde Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 19 Oct 2023 09:44:06 -0700 Subject: [PATCH 61/65] WIP: Rename BaseStage to StageBase --- morpheus/pipeline/base_stage.py | 4 ++-- morpheus/pipeline/pipeline.py | 4 ++-- morpheus/pipeline/receiver.py | 6 +++--- morpheus/pipeline/sender.py | 6 +++--- morpheus/pipeline/stage_schema.py | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/morpheus/pipeline/base_stage.py b/morpheus/pipeline/base_stage.py index 500cbd4c1c..1db05fd1db 100644 --- a/morpheus/pipeline/base_stage.py +++ b/morpheus/pipeline/base_stage.py @@ -234,7 +234,7 @@ def get_all_input_stages(self) -> list["StageBase"]: Returns ------- - list[`morpheus.pipeline.pipeline.BaseStage`] + list[`morpheus.pipeline.pipeline.StageBase`] All input stages. """ return [x.parent for x in self.get_all_inputs()] @@ -261,7 +261,7 @@ def get_all_output_stages(self) -> list["StageBase"]: Returns ------- - list[`morpheus.pipeline.pipeline.BaseStage`] + list[`morpheus.pipeline.pipeline.StageBase`] All output stages. """ return [x.parent for x in self.get_all_outputs()] diff --git a/morpheus/pipeline/pipeline.py b/morpheus/pipeline/pipeline.py index 7d26eb5209..403095d108 100644 --- a/morpheus/pipeline/pipeline.py +++ b/morpheus/pipeline/pipeline.py @@ -140,7 +140,7 @@ def add_edge(self, Parameters ---------- - start : typing.Union[BaseStage, Sender] + start : typing.Union[StageBase, Sender] The start of the edge or parent stage. end : typing.Union[Stage, Receiver] @@ -278,7 +278,7 @@ def _pre_build(self): def build(self): """ This function sequentially activates all the Morpheus pipeline stages passed by the users to execute a - pipeline. For the `Source` and all added `Stage` objects, `BaseStage.build` will be called sequentially to + pipeline. For the `Source` and all added `Stage` objects, `StageBase.build` will be called sequentially to construct the pipeline. Once the pipeline has been constructed, this will start the pipeline by calling `Source.start` on the source diff --git a/morpheus/pipeline/receiver.py b/morpheus/pipeline/receiver.py index 8dff1b365c..1d24a3899a 100644 --- a/morpheus/pipeline/receiver.py +++ b/morpheus/pipeline/receiver.py @@ -25,12 +25,12 @@ class Receiver(): """ - The `Receiver` object represents a downstream port on a `BaseStage` object that gets messages from a `Sender`. + The `Receiver` object represents a downstream port on a `StageBase` object that gets messages from a `Sender`. Parameters ---------- - parent : `morpheus.pipeline.pipeline.BaseStage` - Parent `BaseStage` object. + parent : `morpheus.pipeline.pipeline.StageBase` + Parent `StageBase` object. port_number : int Receiver port number. """ diff --git a/morpheus/pipeline/sender.py b/morpheus/pipeline/sender.py index 4d2ea49ca0..4981cf0177 100644 --- a/morpheus/pipeline/sender.py +++ b/morpheus/pipeline/sender.py @@ -24,12 +24,12 @@ class Sender(): """ - The `Sender` object represents a port on a `BaseStage` object that sends messages to a `Receiver`. + The `Sender` object represents a port on a `StageBase` object that sends messages to a `Receiver`. Parameters ---------- - parent : `morpheus.pipeline.pipeline.BaseStage` - Parent `BaseStage` object. + parent : `morpheus.pipeline.pipeline.StageBase` + Parent `StageBase` object. port_number : int Sender port number. """ diff --git a/morpheus/pipeline/stage_schema.py b/morpheus/pipeline/stage_schema.py index e1a6562f3f..90871306c2 100644 --- a/morpheus/pipeline/stage_schema.py +++ b/morpheus/pipeline/stage_schema.py @@ -106,7 +106,7 @@ def _complete(self): Calls `_complete` on all output port schemas. This will trigger an assertion error if any of the output port schemas do not have a type set, or have previously been completed. Users should not call this function directly, as this is called internally by the - `BaseStage` and `Receiver` classes. + `StageBase` and `Receiver` classes. """ for port_schema in self.output_schemas: # This locks the port schema From 7a57b825768b86d8cc6eaac39ec5de0b58210dbc Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 19 Oct 2023 09:45:14 -0700 Subject: [PATCH 62/65] WIP: Rename BaseStage to StageBase --- morpheus/pipeline/__init__.py | 2 +- morpheus/pipeline/pipeline.py | 2 +- morpheus/pipeline/{base_stage.py => stage_base.py} | 0 morpheus/pipeline/stage_schema.py | 2 +- tests/pipeline/conftest.py | 2 +- 5 files changed, 4 insertions(+), 4 deletions(-) rename morpheus/pipeline/{base_stage.py => stage_base.py} (100%) diff --git a/morpheus/pipeline/__init__.py b/morpheus/pipeline/__init__.py index 6f0367fc9e..86ffb4f7f4 100644 --- a/morpheus/pipeline/__init__.py +++ b/morpheus/pipeline/__init__.py @@ -22,7 +22,7 @@ from morpheus.pipeline.stage_schema import StageSchema from morpheus.pipeline.sender import Sender from morpheus.pipeline.receiver import Receiver -from morpheus.pipeline.base_stage import StageBase +from morpheus.pipeline.stage_base import StageBase from morpheus.pipeline.stage import Stage from morpheus.pipeline.single_port_stage import SinglePortStage from morpheus.pipeline.multi_message_stage import MultiMessageStage diff --git a/morpheus/pipeline/pipeline.py b/morpheus/pipeline/pipeline.py index 403095d108..f4dbd07d26 100644 --- a/morpheus/pipeline/pipeline.py +++ b/morpheus/pipeline/pipeline.py @@ -29,7 +29,7 @@ import cudf from morpheus.config import Config -from morpheus.pipeline.base_stage import StageBase +from morpheus.pipeline.stage_base import StageBase from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.receiver import Receiver from morpheus.pipeline.sender import Sender diff --git a/morpheus/pipeline/base_stage.py b/morpheus/pipeline/stage_base.py similarity index 100% rename from morpheus/pipeline/base_stage.py rename to morpheus/pipeline/stage_base.py diff --git a/morpheus/pipeline/stage_schema.py b/morpheus/pipeline/stage_schema.py index 90871306c2..5beb5c5547 100644 --- a/morpheus/pipeline/stage_schema.py +++ b/morpheus/pipeline/stage_schema.py @@ -16,7 +16,7 @@ import typing if typing.TYPE_CHECKING: - from .base_stage import StageBase + from .stage_base import StageBase class PortSchema: diff --git a/tests/pipeline/conftest.py b/tests/pipeline/conftest.py index 7a5e939760..e74ecc6f91 100644 --- a/tests/pipeline/conftest.py +++ b/tests/pipeline/conftest.py @@ -22,7 +22,7 @@ from _utils.stages.split_stage import SplitStage from morpheus.config import Config from morpheus.pipeline import Pipeline -from morpheus.pipeline.base_stage import StageBase +from morpheus.pipeline.stage_base import StageBase from morpheus.pipeline.source_stage import SourceStage from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage from morpheus.stages.preprocess.deserialize_stage import DeserializeStage From 55052e78e88f4cf5ca260c9a9badffafd8c1380c Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 19 Oct 2023 10:13:01 -0700 Subject: [PATCH 63/65] Update WriteToVectorDBStage --- morpheus/stages/output/write_to_vector_db.py | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/morpheus/stages/output/write_to_vector_db.py b/morpheus/stages/output/write_to_vector_db.py index 3fef0fcd66..22473cc05e 100644 --- a/morpheus/stages/output/write_to_vector_db.py +++ b/morpheus/stages/output/write_to_vector_db.py @@ -20,15 +20,15 @@ from morpheus.config import Config from morpheus.messages import ControlMessage +from morpheus.pipeline.pass_thru_type_mixin import PassThruTypeMixin from morpheus.pipeline.single_port_stage import SinglePortStage -from morpheus.pipeline.stream_pair import StreamPair from morpheus.service.vector_db_service import VectorDBService from morpheus.utils.vector_db_service_utils import VectorDBServiceFactory logger = logging.getLogger(__name__) -class WriteToVectorDBStage(SinglePortStage): +class WriteToVectorDBStage(PassThruTypeMixin, SinglePortStage): """ Writes messages to a Vector Database. @@ -100,9 +100,7 @@ def on_completed(self): # Close vector database service connection self._service.close() - def _build_single(self, builder: mrc.Builder, input_stream: StreamPair) -> StreamPair: - - stream = input_stream[0] + def _build_single(self, builder: mrc.Builder, input_node: mrc.SegmentObject) -> mrc.SegmentObject: def on_data(ctrl_msg: ControlMessage) -> ControlMessage: # Insert entries in the dataframe to vector database. @@ -116,8 +114,6 @@ def on_data(ctrl_msg: ControlMessage) -> ControlMessage: to_vector_db = builder.make_node(self.unique_name, ops.map(on_data), ops.on_completed(self.on_completed)) - builder.make_edge(stream, to_vector_db) - stream = to_vector_db + builder.make_edge(input_node, to_vector_db) - # Return input unchanged to allow passthrough - return stream, input_stream[1] + return to_vector_db From b43f3c725abf7343aca0c4f0b301ea67f7d588c2 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 19 Oct 2023 10:30:48 -0700 Subject: [PATCH 64/65] Fix import order --- morpheus/pipeline/pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/morpheus/pipeline/pipeline.py b/morpheus/pipeline/pipeline.py index f4dbd07d26..ae8552fdec 100644 --- a/morpheus/pipeline/pipeline.py +++ b/morpheus/pipeline/pipeline.py @@ -29,12 +29,12 @@ import cudf from morpheus.config import Config -from morpheus.pipeline.stage_base import StageBase from morpheus.pipeline.preallocator_mixin import PreallocatorMixin from morpheus.pipeline.receiver import Receiver from morpheus.pipeline.sender import Sender from morpheus.pipeline.source_stage import SourceStage from morpheus.pipeline.stage import Stage +from morpheus.pipeline.stage_base import StageBase from morpheus.utils.type_utils import pretty_print_type_name logger = logging.getLogger(__name__) From 14ce11420befc661d8f1be2b35cc3884c3784a30 Mon Sep 17 00:00:00 2001 From: David Gardner Date: Thu, 19 Oct 2023 10:32:22 -0700 Subject: [PATCH 65/65] Fix import order --- tests/pipeline/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/pipeline/conftest.py b/tests/pipeline/conftest.py index e74ecc6f91..d46cc33ff6 100644 --- a/tests/pipeline/conftest.py +++ b/tests/pipeline/conftest.py @@ -22,8 +22,8 @@ from _utils.stages.split_stage import SplitStage from morpheus.config import Config from morpheus.pipeline import Pipeline -from morpheus.pipeline.stage_base import StageBase from morpheus.pipeline.source_stage import SourceStage +from morpheus.pipeline.stage_base import StageBase from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage from morpheus.stages.preprocess.deserialize_stage import DeserializeStage