From 3af95b005313a9042e1be320c4559e0dc5332cd3 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sun, 10 Mar 2024 01:11:20 -0800 Subject: [PATCH 001/163] init --- .../pythonworker/PythonProxyClient.scala | 20 ++++---- .../pythonworker/PythonProxyServer.scala | 8 ++-- .../partitioners/Partitioner.scala | 5 +- .../architecture/worker/DataProcessor.scala | 48 ++++++++----------- .../worker/promisehandlers/StartHandler.scala | 5 +- .../common/ambermessage/DataPayload.scala | 3 +- .../ics/texera/workflow/common/Marker.scala | 6 +++ .../messaginglayer/OutputManagerSpec.scala | 10 ++-- .../worker/DataProcessorSpec.scala | 14 ++---- 9 files changed, 54 insertions(+), 65 deletions(-) create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala index d4b3fe73bbe..677ed841e00 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala @@ -1,22 +1,15 @@ package edu.uci.ics.amber.engine.architecture.pythonworker import com.twitter.util.{Await, Promise} -import edu.uci.ics.amber.engine.architecture.pythonworker.WorkerBatchInternalQueue.{ - ActorCommandElement, - ControlElement, - ControlElementV2, - DataElement -} +import edu.uci.ics.amber.engine.architecture.pythonworker.WorkerBatchInternalQueue.{ActorCommandElement, ControlElement, ControlElementV2, DataElement} import edu.uci.ics.amber.engine.common.AmberLogging import edu.uci.ics.amber.engine.common.actormessage.{ActorCommand, PythonActorMessage} import edu.uci.ics.amber.engine.common.amberexception.WorkflowRuntimeException -import edu.uci.ics.amber.engine.common.ambermessage.InvocationConvertUtils.{ - controlInvocationToV2, - returnInvocationToV2 -} +import edu.uci.ics.amber.engine.common.ambermessage.InvocationConvertUtils.{controlInvocationToV2, returnInvocationToV2} import edu.uci.ics.amber.engine.common.ambermessage.{PythonControlMessage, _} import edu.uci.ics.amber.engine.common.rpc.AsyncRPCClient.{ControlInvocation, ReturnInvocation} import edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity +import edu.uci.ics.texera.workflow.common.EndOfUpstream import edu.uci.ics.texera.workflow.common.tuple.Tuple import edu.uci.ics.texera.workflow.common.tuple.schema.Schema import org.apache.arrow.flight._ @@ -105,8 +98,11 @@ class PythonProxyClient(portNumberPromise: Promise[Int], val actorId: ActorVirtu val tuples: mutable.Queue[Tuple] = mutable.Queue(frame.map(_.asInstanceOf[Tuple]).toSeq: _*) writeArrowStream(tuples, from, isEnd = false) - case EndOfUpstream() => - writeArrowStream(mutable.Queue(), from, isEnd = true) + case MarkerFrame(frame) => + frame match { + case EndOfUpstream() => + writeArrowStream(mutable.Queue(), from, isEnd = true) + } } } diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala index 6e6137d23fb..8b36a2420a2 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala @@ -3,10 +3,7 @@ package edu.uci.ics.amber.engine.architecture.pythonworker import com.google.common.primitives.Longs import edu.uci.ics.amber.engine.architecture.messaginglayer.NetworkOutputGateway import edu.uci.ics.amber.engine.common.AmberLogging -import edu.uci.ics.amber.engine.common.ambermessage.InvocationConvertUtils.{ - controlInvocationToV1, - returnInvocationToV1 -} +import edu.uci.ics.amber.engine.common.ambermessage.InvocationConvertUtils.{controlInvocationToV1, returnInvocationToV1} import edu.uci.ics.amber.engine.common.ambermessage._ import edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity import edu.uci.ics.texera.workflow.common.tuple.Tuple @@ -20,6 +17,7 @@ import java.net.ServerSocket import java.util.concurrent.atomic.AtomicInteger import scala.collection.mutable import com.twitter.util.Promise +import edu.uci.ics.texera.workflow.common.EndOfUpstream import java.nio.charset.Charset @@ -106,7 +104,7 @@ private class AmberProducer( if (isEnd) { // EndOfUpstream assert(root.getRowCount == 0) - outputPort.sendTo(to, EndOfUpstream()) + outputPort.sendTo(to, MarkerFrame(EndOfUpstream())) } else { // normal data batches val queue = mutable.Queue[Tuple]() diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala index 08bf2aeffbd..c479d308c81 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala @@ -2,8 +2,9 @@ package edu.uci.ics.amber.engine.architecture.sendsemantics.partitioners import edu.uci.ics.amber.engine.architecture.messaginglayer.NetworkOutputGateway import edu.uci.ics.amber.engine.common.AmberConfig -import edu.uci.ics.amber.engine.common.ambermessage.{DataFrame, EndOfUpstream} +import edu.uci.ics.amber.engine.common.ambermessage.{DataFrame, MarkerFrame} import edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity +import edu.uci.ics.texera.workflow.common.EndOfUpstream import edu.uci.ics.texera.workflow.common.tuple.Tuple import scala.collection.mutable.ArrayBuffer @@ -31,7 +32,7 @@ class NetworkOutputBuffer( def noMore(): Unit = { flush() - dataOutputPort.sendTo(to, EndOfUpstream()) + dataOutputPort.sendTo(to, MarkerFrame(EndOfUpstream())) } def flush(): Unit = { diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index 7f7c7c57714..d97c200c2cf 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -7,32 +7,21 @@ import edu.uci.ics.amber.engine.architecture.controller.promisehandlers.PortComp import edu.uci.ics.amber.engine.architecture.controller.promisehandlers.WorkerExecutionCompletedHandler.WorkerExecutionCompleted import edu.uci.ics.amber.engine.architecture.controller.promisehandlers.WorkerStateUpdatedHandler.WorkerStateUpdated import edu.uci.ics.amber.engine.architecture.logreplay.ReplayLogManager -import edu.uci.ics.amber.engine.architecture.messaginglayer.{ - InputManager, - OutputManager, - WorkerTimerService -} +import edu.uci.ics.amber.engine.architecture.messaginglayer.{InputManager, OutputManager, WorkerTimerService} import edu.uci.ics.amber.engine.architecture.worker.DataProcessor.{FinalizeOperator, FinalizePort} import edu.uci.ics.amber.engine.architecture.worker.WorkflowWorker.MainThreadDelegateMessage import edu.uci.ics.amber.engine.architecture.worker.promisehandlers.PauseHandler.PauseWorker -import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerState.{ - COMPLETED, - READY, - RUNNING -} +import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerState.{COMPLETED, READY, RUNNING} import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerStatistics import edu.uci.ics.amber.engine.common.ambermessage._ import edu.uci.ics.amber.engine.common.statetransition.WorkerStateManager import edu.uci.ics.amber.engine.common.tuple.amber.{SchemaEnforceable, SpecialTupleLike, TupleLike} import edu.uci.ics.amber.engine.common.virtualidentity.util.{CONTROLLER, SELF} -import edu.uci.ics.amber.engine.common.virtualidentity.{ - ActorVirtualIdentity, - ChannelIdentity, - PhysicalOpIdentity -} +import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, ChannelIdentity, PhysicalOpIdentity} import edu.uci.ics.amber.engine.common.workflow.PortIdentity import edu.uci.ics.amber.engine.common.{IOperatorExecutor, VirtualIdentityUtils} import edu.uci.ics.amber.error.ErrorUtils.{mkConsoleMessage, safely} +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, Marker} import edu.uci.ics.texera.workflow.common.tuple.Tuple object DataProcessor { @@ -196,20 +185,25 @@ class DataProcessor( ) inputManager.initBatch(channelId, tuples) processInputTuple(inputManager.getNextTuple) - case EndOfUpstream() => - val channel = this.inputGateway.getChannel(channelId) - val portId = channel.getPortId + case MarkerFrame(marker) => + marker match { + case EndOfUpstream() => + val channel = this.inputGateway.getChannel(channelId) + val portId = channel.getPortId - this.inputManager.getPort(portId).channels(channelId) = true + this.inputManager.getPort(portId).channels(channelId) = true - if (inputManager.isPortCompleted(portId)) { - inputManager.initBatch(channelId, Array.empty) - processInputExhausted() - outputManager.outputIterator.appendSpecialTupleToEnd(FinalizePort(portId, input = true)) - } - if (inputManager.getAllPorts.forall(portId => inputManager.isPortCompleted(portId))) { - // assuming all the output ports finalize after all input ports are finalized. - outputManager.finalizeOutput() + if (inputManager.isPortCompleted(portId)) { + inputManager.initBatch(channelId, Array.empty) + processInputExhausted() + outputManager.outputIterator.appendSpecialTupleToEnd(FinalizePort(portId, input = true)) + } + if (inputManager.getAllPorts.forall(portId => inputManager.isPortCompleted(portId))) { + // assuming all the output ports finalize after all input ports are finalized. + outputManager.finalizeOutput() + } + case _ => + logger.error(s"unsupported marker type: $marker") } } statisticsManager.increaseDataProcessingTime(System.nanoTime() - dataProcessingStartTime) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala index 6123aaeb7cb..cb4c2feeccd 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala @@ -6,11 +6,12 @@ import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerState import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerState.{READY, RUNNING} import edu.uci.ics.amber.engine.common.SourceOperatorExecutor import edu.uci.ics.amber.engine.common.amberexception.WorkflowRuntimeException -import edu.uci.ics.amber.engine.common.ambermessage.EndOfUpstream +import edu.uci.ics.amber.engine.common.ambermessage.MarkerFrame import edu.uci.ics.amber.engine.common.rpc.AsyncRPCServer.ControlCommand import edu.uci.ics.amber.engine.common.virtualidentity.ChannelIdentity import edu.uci.ics.amber.engine.common.virtualidentity.util.SOURCE_STARTER_ACTOR import edu.uci.ics.amber.engine.common.workflow.PortIdentity +import edu.uci.ics.texera.workflow.common.EndOfUpstream object StartHandler { final case class StartWorker() extends ControlCommand[WorkerState] @@ -32,7 +33,7 @@ trait StartHandler { .setPortId(dummyInputPortId) dp.processDataPayload( ChannelIdentity(SOURCE_STARTER_ACTOR, dp.actorId, isControl = false), - EndOfUpstream() + MarkerFrame(EndOfUpstream()) ) dp.stateManager.getCurrentState } else { diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/ambermessage/DataPayload.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/ambermessage/DataPayload.scala index 6599ef0b855..64f0e6a80bd 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/ambermessage/DataPayload.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/ambermessage/DataPayload.scala @@ -1,10 +1,11 @@ package edu.uci.ics.amber.engine.common.ambermessage +import edu.uci.ics.texera.workflow.common.Marker import edu.uci.ics.texera.workflow.common.tuple.Tuple sealed trait DataPayload extends WorkflowFIFOMessagePayload {} -final case class EndOfUpstream() extends DataPayload +final case class MarkerFrame(frame: Marker) extends DataPayload final case class DataFrame(frame: Array[Tuple]) extends DataPayload { val inMemSize: Long = { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala new file mode 100644 index 00000000000..d7b87fd256a --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -0,0 +1,6 @@ +package edu.uci.ics.texera.workflow.common + +sealed trait Marker {} + +final case class EndOfUpstream() extends Marker +final case class EndOfIteration() extends Marker diff --git a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManagerSpec.scala b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManagerSpec.scala index 6461d3bc4d7..25074f0a793 100644 --- a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManagerSpec.scala +++ b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManagerSpec.scala @@ -4,13 +4,9 @@ import com.softwaremill.macwire.wire import edu.uci.ics.amber.engine.architecture.sendsemantics.partitionings.OneToOnePartitioning import edu.uci.ics.amber.engine.common.ambermessage._ import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike -import edu.uci.ics.amber.engine.common.virtualidentity.{ - ActorVirtualIdentity, - ChannelIdentity, - OperatorIdentity, - PhysicalOpIdentity -} +import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, ChannelIdentity, OperatorIdentity, PhysicalOpIdentity} import edu.uci.ics.amber.engine.common.workflow.{PhysicalLink, PortIdentity} +import edu.uci.ics.texera.workflow.common.EndOfUpstream import edu.uci.ics.texera.workflow.common.tuple.schema.{AttributeType, Schema} import org.scalamock.scalatest.MockFactory import org.scalatest.flatspec.AnyFlatSpec @@ -65,7 +61,7 @@ class OutputManagerSpec extends AnyFlatSpec with MockFactory { (mockHandler.apply _).expects( mkDataMessage(fakeID, identifier, 2, DataFrame(tuples.slice(20, 21))) ) - (mockHandler.apply _).expects(mkDataMessage(fakeID, identifier, 3, EndOfUpstream())) + (mockHandler.apply _).expects(mkDataMessage(fakeID, identifier, 3, MarkerFrame(EndOfUpstream()))) } val fakeLink = PhysicalLink(physicalOpId(), mockPortId, physicalOpId(), mockPortId) val fakeReceiver = Array[ActorVirtualIdentity](fakeID) diff --git a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala index 4f48eef183a..0d0f1bbdb6f 100644 --- a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala +++ b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala @@ -6,19 +6,15 @@ import edu.uci.ics.amber.engine.architecture.worker.WorkflowWorker.MainThreadDel import edu.uci.ics.amber.engine.architecture.worker.promisehandlers.OpenOperatorHandler.OpenOperator import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerState.READY import edu.uci.ics.amber.engine.common.VirtualIdentityUtils -import edu.uci.ics.amber.engine.common.ambermessage.{DataFrame, EndOfUpstream, WorkflowFIFOMessage} +import edu.uci.ics.amber.engine.common.ambermessage.{DataFrame, MarkerFrame, WorkflowFIFOMessage} import edu.uci.ics.amber.engine.common.rpc.AsyncRPCClient import edu.uci.ics.amber.engine.common.rpc.AsyncRPCClient.ControlInvocation import edu.uci.ics.amber.engine.common.rpc.AsyncRPCServer.ControlCommand import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike import edu.uci.ics.amber.engine.common.virtualidentity.util.CONTROLLER -import edu.uci.ics.amber.engine.common.virtualidentity.{ - ActorVirtualIdentity, - ChannelIdentity, - OperatorIdentity, - PhysicalOpIdentity -} +import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, ChannelIdentity, OperatorIdentity, PhysicalOpIdentity} import edu.uci.ics.amber.engine.common.workflow.PortIdentity +import edu.uci.ics.texera.workflow.common.EndOfUpstream import edu.uci.ics.texera.workflow.common.WorkflowContext.DEFAULT_WORKFLOW_ID import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple @@ -107,7 +103,7 @@ class DataProcessorSpec extends AnyFlatSpec with MockFactory with BeforeAndAfter } dp.processDataPayload( ChannelIdentity(senderWorkerId, testWorkerId, isControl = false), - EndOfUpstream() + MarkerFrame(EndOfUpstream()) ) while (dp.inputManager.hasUnfinishedInput || dp.outputManager.hasUnfinishedOutput) { dp.continueDataProcessing() @@ -161,7 +157,7 @@ class DataProcessorSpec extends AnyFlatSpec with MockFactory with BeforeAndAfter (operator.close _).expects().once() dp.processDataPayload( ChannelIdentity(senderWorkerId, testWorkerId, isControl = false), - EndOfUpstream() + MarkerFrame(EndOfUpstream()) ) while (dp.inputManager.hasUnfinishedInput || dp.outputManager.hasUnfinishedOutput) { dp.continueDataProcessing() From c5208d74b17ba6b12defddd9923e052032530e86 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sun, 10 Mar 2024 19:12:43 -0700 Subject: [PATCH 002/163] fix format --- .../ics/amber/engine/architecture/worker/DataProcessor.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index d97c200c2cf..d5efdff730a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -21,7 +21,7 @@ import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, Ch import edu.uci.ics.amber.engine.common.workflow.PortIdentity import edu.uci.ics.amber.engine.common.{IOperatorExecutor, VirtualIdentityUtils} import edu.uci.ics.amber.error.ErrorUtils.{mkConsoleMessage, safely} -import edu.uci.ics.texera.workflow.common.{EndOfUpstream, Marker} +import edu.uci.ics.texera.workflow.common.EndOfUpstream import edu.uci.ics.texera.workflow.common.tuple.Tuple object DataProcessor { From 6830a06398d3a5d7c596870557a10cf1cc5261f3 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sun, 10 Mar 2024 21:43:00 -0700 Subject: [PATCH 003/163] fix format --- .../pythonworker/PythonProxyClient.scala | 12 ++++++++-- .../pythonworker/PythonProxyServer.scala | 5 ++++- .../architecture/worker/DataProcessor.scala | 22 +++++++++++++++---- .../messaginglayer/OutputManagerSpec.scala | 11 ++++++++-- .../worker/DataProcessorSpec.scala | 7 +++++- 5 files changed, 47 insertions(+), 10 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala index 677ed841e00..ae1e9f305ff 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala @@ -1,11 +1,19 @@ package edu.uci.ics.amber.engine.architecture.pythonworker import com.twitter.util.{Await, Promise} -import edu.uci.ics.amber.engine.architecture.pythonworker.WorkerBatchInternalQueue.{ActorCommandElement, ControlElement, ControlElementV2, DataElement} +import edu.uci.ics.amber.engine.architecture.pythonworker.WorkerBatchInternalQueue.{ + ActorCommandElement, + ControlElement, + ControlElementV2, + DataElement +} import edu.uci.ics.amber.engine.common.AmberLogging import edu.uci.ics.amber.engine.common.actormessage.{ActorCommand, PythonActorMessage} import edu.uci.ics.amber.engine.common.amberexception.WorkflowRuntimeException -import edu.uci.ics.amber.engine.common.ambermessage.InvocationConvertUtils.{controlInvocationToV2, returnInvocationToV2} +import edu.uci.ics.amber.engine.common.ambermessage.InvocationConvertUtils.{ + controlInvocationToV2, + returnInvocationToV2 +} import edu.uci.ics.amber.engine.common.ambermessage.{PythonControlMessage, _} import edu.uci.ics.amber.engine.common.rpc.AsyncRPCClient.{ControlInvocation, ReturnInvocation} import edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala index 8b36a2420a2..e56ef8c5f21 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala @@ -3,7 +3,10 @@ package edu.uci.ics.amber.engine.architecture.pythonworker import com.google.common.primitives.Longs import edu.uci.ics.amber.engine.architecture.messaginglayer.NetworkOutputGateway import edu.uci.ics.amber.engine.common.AmberLogging -import edu.uci.ics.amber.engine.common.ambermessage.InvocationConvertUtils.{controlInvocationToV1, returnInvocationToV1} +import edu.uci.ics.amber.engine.common.ambermessage.InvocationConvertUtils.{ + controlInvocationToV1, + returnInvocationToV1 +} import edu.uci.ics.amber.engine.common.ambermessage._ import edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity import edu.uci.ics.texera.workflow.common.tuple.Tuple diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index d5efdff730a..69fc6c79315 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -7,17 +7,29 @@ import edu.uci.ics.amber.engine.architecture.controller.promisehandlers.PortComp import edu.uci.ics.amber.engine.architecture.controller.promisehandlers.WorkerExecutionCompletedHandler.WorkerExecutionCompleted import edu.uci.ics.amber.engine.architecture.controller.promisehandlers.WorkerStateUpdatedHandler.WorkerStateUpdated import edu.uci.ics.amber.engine.architecture.logreplay.ReplayLogManager -import edu.uci.ics.amber.engine.architecture.messaginglayer.{InputManager, OutputManager, WorkerTimerService} +import edu.uci.ics.amber.engine.architecture.messaginglayer.{ + InputManager, + OutputManager, + WorkerTimerService +} import edu.uci.ics.amber.engine.architecture.worker.DataProcessor.{FinalizeOperator, FinalizePort} import edu.uci.ics.amber.engine.architecture.worker.WorkflowWorker.MainThreadDelegateMessage import edu.uci.ics.amber.engine.architecture.worker.promisehandlers.PauseHandler.PauseWorker -import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerState.{COMPLETED, READY, RUNNING} +import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerState.{ + COMPLETED, + READY, + RUNNING +} import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerStatistics import edu.uci.ics.amber.engine.common.ambermessage._ import edu.uci.ics.amber.engine.common.statetransition.WorkerStateManager import edu.uci.ics.amber.engine.common.tuple.amber.{SchemaEnforceable, SpecialTupleLike, TupleLike} import edu.uci.ics.amber.engine.common.virtualidentity.util.{CONTROLLER, SELF} -import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, ChannelIdentity, PhysicalOpIdentity} +import edu.uci.ics.amber.engine.common.virtualidentity.{ + ActorVirtualIdentity, + ChannelIdentity, + PhysicalOpIdentity +} import edu.uci.ics.amber.engine.common.workflow.PortIdentity import edu.uci.ics.amber.engine.common.{IOperatorExecutor, VirtualIdentityUtils} import edu.uci.ics.amber.error.ErrorUtils.{mkConsoleMessage, safely} @@ -196,7 +208,9 @@ class DataProcessor( if (inputManager.isPortCompleted(portId)) { inputManager.initBatch(channelId, Array.empty) processInputExhausted() - outputManager.outputIterator.appendSpecialTupleToEnd(FinalizePort(portId, input = true)) + outputManager.outputIterator.appendSpecialTupleToEnd( + FinalizePort(portId, input = true) + ) } if (inputManager.getAllPorts.forall(portId => inputManager.isPortCompleted(portId))) { // assuming all the output ports finalize after all input ports are finalized. diff --git a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManagerSpec.scala b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManagerSpec.scala index 25074f0a793..05963bf97cf 100644 --- a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManagerSpec.scala +++ b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManagerSpec.scala @@ -4,7 +4,12 @@ import com.softwaremill.macwire.wire import edu.uci.ics.amber.engine.architecture.sendsemantics.partitionings.OneToOnePartitioning import edu.uci.ics.amber.engine.common.ambermessage._ import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike -import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, ChannelIdentity, OperatorIdentity, PhysicalOpIdentity} +import edu.uci.ics.amber.engine.common.virtualidentity.{ + ActorVirtualIdentity, + ChannelIdentity, + OperatorIdentity, + PhysicalOpIdentity +} import edu.uci.ics.amber.engine.common.workflow.{PhysicalLink, PortIdentity} import edu.uci.ics.texera.workflow.common.EndOfUpstream import edu.uci.ics.texera.workflow.common.tuple.schema.{AttributeType, Schema} @@ -61,7 +66,9 @@ class OutputManagerSpec extends AnyFlatSpec with MockFactory { (mockHandler.apply _).expects( mkDataMessage(fakeID, identifier, 2, DataFrame(tuples.slice(20, 21))) ) - (mockHandler.apply _).expects(mkDataMessage(fakeID, identifier, 3, MarkerFrame(EndOfUpstream()))) + (mockHandler.apply _).expects( + mkDataMessage(fakeID, identifier, 3, MarkerFrame(EndOfUpstream())) + ) } val fakeLink = PhysicalLink(physicalOpId(), mockPortId, physicalOpId(), mockPortId) val fakeReceiver = Array[ActorVirtualIdentity](fakeID) diff --git a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala index 0d0f1bbdb6f..a6b4d9f8600 100644 --- a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala +++ b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala @@ -12,7 +12,12 @@ import edu.uci.ics.amber.engine.common.rpc.AsyncRPCClient.ControlInvocation import edu.uci.ics.amber.engine.common.rpc.AsyncRPCServer.ControlCommand import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike import edu.uci.ics.amber.engine.common.virtualidentity.util.CONTROLLER -import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, ChannelIdentity, OperatorIdentity, PhysicalOpIdentity} +import edu.uci.ics.amber.engine.common.virtualidentity.{ + ActorVirtualIdentity, + ChannelIdentity, + OperatorIdentity, + PhysicalOpIdentity +} import edu.uci.ics.amber.engine.common.workflow.PortIdentity import edu.uci.ics.texera.workflow.common.EndOfUpstream import edu.uci.ics.texera.workflow.common.WorkflowContext.DEFAULT_WORKFLOW_ID From 8f3975bed9159a926bdf208f00de14db224c814f Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Mon, 18 Mar 2024 16:21:00 -0700 Subject: [PATCH 004/163] rename --- .../ics/amber/engine/architecture/worker/DataProcessor.scala | 5 +++-- .../edu/uci/ics/amber/engine/common/IOperatorExecutor.scala | 2 ++ .../scala/edu/uci/ics/texera/workflow/common/Marker.scala | 2 ++ .../texera/workflow/common/operators/OperatorExecutor.scala | 3 +++ 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index 764d57e9e45..5d80fef3c96 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -108,7 +108,7 @@ class DataProcessor( * process end of an input port with Executor.onFinish(). * this function is only called by the DP thread. */ - private[this] def processInputExhausted(): Unit = { + private[this] def processEndOfUpstream(): Unit = { try { outputManager.outputIterator.setTupleOutput( operator.onFinishMultiPort( @@ -159,6 +159,7 @@ class DataProcessor( ) asyncRPCClient.send(WorkerExecutionCompleted(), CONTROLLER) case FinalizePort(portId, input) => + operator.onOutputFinish(portId.id) asyncRPCClient.send(PortCompleted(portId, input), CONTROLLER) case schemaEnforceable: SchemaEnforceable => statisticsManager.increaseOutputTupleCount() @@ -207,7 +208,7 @@ class DataProcessor( if (inputManager.isPortCompleted(portId)) { inputManager.initBatch(channelId, Array.empty) - processInputExhausted() + processEndOfUpstream() outputManager.outputIterator.appendSpecialTupleToEnd( FinalizePort(portId, input = true) ) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/IOperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/IOperatorExecutor.scala index 17fd33d25bc..8f1660aa297 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/IOperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/IOperatorExecutor.scala @@ -2,6 +2,7 @@ package edu.uci.ics.amber.engine.common import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike import edu.uci.ics.amber.engine.common.workflow.PortIdentity +import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.tuple.Tuple trait IOperatorExecutor { @@ -14,4 +15,5 @@ trait IOperatorExecutor { def onFinishMultiPort(port: Int): Iterator[(TupleLike, Option[PortIdentity])] + def onOutputFinish(port: Int): Iterator[State] } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index d7b87fd256a..7156f050bdb 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -4,3 +4,5 @@ sealed trait Marker {} final case class EndOfUpstream() extends Marker final case class EndOfIteration() extends Marker + +final case class State() extends Marker diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala index f53f0285a93..28c0c8fa875 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala @@ -3,6 +3,7 @@ package edu.uci.ics.texera.workflow.common.operators import edu.uci.ics.amber.engine.common.IOperatorExecutor import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike import edu.uci.ics.amber.engine.common.workflow.PortIdentity +import edu.uci.ics.texera.workflow.common.{Marker, State} import edu.uci.ics.texera.workflow.common.tuple.Tuple trait OperatorExecutor extends IOperatorExecutor { @@ -25,4 +26,6 @@ trait OperatorExecutor extends IOperatorExecutor { } def onFinish(port: Int): Iterator[TupleLike] = Iterator.empty + override def onOutputFinish(port: Int): Iterator[State] = Iterator.empty + } From c1f48bfdcd2cff151cb893bedac39e24da202aad Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Wed, 20 Mar 2024 14:12:44 -0700 Subject: [PATCH 005/163] fix fmt --- .../texera/workflow/common/operators/OperatorExecutor.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala index e7f5564d4cd..f9a2773f42b 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala @@ -2,7 +2,7 @@ package edu.uci.ics.texera.workflow.common.operators import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike import edu.uci.ics.amber.engine.common.workflow.PortIdentity -import edu.uci.ics.texera.workflow.common.{Marker, State} +import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.tuple.Tuple trait OperatorExecutor { @@ -25,6 +25,6 @@ trait OperatorExecutor { } def onFinish(port: Int): Iterator[TupleLike] = Iterator.empty - override def onOutputFinish(port: Int): Iterator[State] = Iterator.empty + def onOutputFinish(port: Int): Iterator[State] = Iterator.empty } From 7e407980ce1b35666afb46dc3f06938812827daa Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Mon, 25 Mar 2024 14:31:57 -0700 Subject: [PATCH 006/163] update --- .../scala/edu/uci/ics/texera/workflow/common/Marker.scala | 5 +++-- .../texera/workflow/common/operators/OperatorExecutor.scala | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index 7156f050bdb..cecc2504a87 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -1,8 +1,9 @@ package edu.uci.ics.texera.workflow.common +import scala.collection.mutable + sealed trait Marker {} final case class EndOfUpstream() extends Marker final case class EndOfIteration() extends Marker - -final case class State() extends Marker +final case class State(key: String, value: Any) extends Marker diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala index f9a2773f42b..26783039ef8 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala @@ -20,6 +20,8 @@ trait OperatorExecutor { def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] + def processMarker(marker: State, port: Int): Iterator[State] = Iterator.empty + def onFinishMultiPort(port: Int): Iterator[(TupleLike, Option[PortIdentity])] = { onFinish(port).map(t => (t, None)) } From e7bab7fc87bbb989ed935ace3c57f74f9e1c7be7 Mon Sep 17 00:00:00 2001 From: Shengquan Ni Date: Wed, 3 Apr 2024 17:15:38 -0700 Subject: [PATCH 007/163] add string serialization and test program --- .../ics/amber/engine/common/SerializedState.scala | 10 ++++++++++ .../scala/edu/uci/ics/texera/web/JsonTest.scala | 15 ++++++--------- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SerializedState.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SerializedState.scala index d0f28804a5c..f38c2bcc5da 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SerializedState.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SerializedState.scala @@ -16,6 +16,16 @@ object SerializedState { val manifest = Serializers.manifestFor(ser, obj) SerializedState(bytes, ser.identifier, manifest) } + + def fromObjectToString[T <: AnyRef](obj:T):String = { + val serializedObj = fromObject(obj, AmberUtils.serde) + s"${serializedObj.manifest}amber_serialization${serializedObj.serializerId}amber_serialization" + new String(serializedObj.bytes, "UTF-8") + } + + def stringToObject(str:String):AnyRef = { + val fields = str.split("amber_serialization") + SerializedState(fields(2).getBytes("UTF-8"),fields(1).toInt,fields(0)).toObject[AnyRef](AmberUtils.serde) + } } case class SerializedState(bytes: Array[Byte], serializerId: Int, manifest: String) { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala index 64cd4247cc7..0d59927e2dc 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala @@ -1,5 +1,6 @@ package edu.uci.ics.texera.web +import edu.uci.ics.amber.engine.common.{AmberUtils, SerializedState} import edu.uci.ics.texera.Utils import edu.uci.ics.texera.web.workflowruntimestate.WorkflowAggregatedState import edu.uci.ics.texera.web.workflowruntimestate.WorkflowAggregatedState.RUNNING @@ -7,15 +8,11 @@ import edu.uci.ics.texera.web.workflowruntimestate.WorkflowAggregatedState.RUNNI object JsonTest { def main(args: Array[String]): Unit = { - val a = RUNNING - val om = Utils.objectMapper - - val str = om.writeValueAsString(a) - println(str) - - val des = om.readValue(str, classOf[WorkflowAggregatedState]) - println(des) - + AmberUtils.startActorWorker(None) + val a = Map(1 -> 2, 3 -> "1231234") + val s = SerializedState.fromObjectToString(a) + val b: AnyRef = SerializedState.stringToObject(s) + println(b) } } From 6fb5466619eafb093be6623afaf40c4cec8ddf54 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 4 Apr 2024 20:13:58 -0700 Subject: [PATCH 008/163] update --- .../ics/amber/engine/architecture/worker/DataProcessor.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index aaba5caa752..b215681d5d0 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -153,7 +153,7 @@ class DataProcessor( ) asyncRPCClient.send(WorkerExecutionCompleted(), CONTROLLER) case FinalizePort(portId, input) => - operator.onOutputFinish(portId.id) + //operator.onOutputFinish(portId.id) asyncRPCClient.send(PortCompleted(portId, input), CONTROLLER) case schemaEnforceable: SchemaEnforceable => statisticsManager.increaseOutputTupleCount() From 772fbf7de1c85ca02534f034d786478bb6c4ddc0 Mon Sep 17 00:00:00 2001 From: Shengquan Ni Date: Fri, 5 Apr 2024 16:15:01 -0700 Subject: [PATCH 009/163] update serialization --- .../amber/engine/common/SerializedState.scala | 4 ++-- .../edu/uci/ics/texera/web/JsonTest.scala | 22 +++++++++++++++---- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SerializedState.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SerializedState.scala index f38c2bcc5da..44faf54c304 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SerializedState.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SerializedState.scala @@ -19,12 +19,12 @@ object SerializedState { def fromObjectToString[T <: AnyRef](obj:T):String = { val serializedObj = fromObject(obj, AmberUtils.serde) - s"${serializedObj.manifest}amber_serialization${serializedObj.serializerId}amber_serialization" + new String(serializedObj.bytes, "UTF-8") + s"${serializedObj.manifest}amber_serialization${serializedObj.serializerId}amber_serialization" + (serializedObj.bytes.map(_.toChar)).mkString } def stringToObject(str:String):AnyRef = { val fields = str.split("amber_serialization") - SerializedState(fields(2).getBytes("UTF-8"),fields(1).toInt,fields(0)).toObject[AnyRef](AmberUtils.serde) + SerializedState(fields(2).map(_.toByte).toArray,fields(1).toInt,fields(0)).toObject[AnyRef](AmberUtils.serde) } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala index 0d59927e2dc..89d0de14178 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala @@ -5,14 +5,28 @@ import edu.uci.ics.texera.Utils import edu.uci.ics.texera.web.workflowruntimestate.WorkflowAggregatedState import edu.uci.ics.texera.web.workflowruntimestate.WorkflowAggregatedState.RUNNING +import scala.collection.mutable + object JsonTest { def main(args: Array[String]): Unit = { AmberUtils.startActorWorker(None) - val a = Map(1 -> 2, 3 -> "1231234") - val s = SerializedState.fromObjectToString(a) - val b: AnyRef = SerializedState.stringToObject(s) - println(b) + val testObjs = Array( + Map(1 -> "123", 3 -> "1231234"), + mutable.HashMap[String, Any]("name" -> "peter", "mail" -> "peter@uci.edu", "grade" -> 4.0), + Array(1,2,3, 4.0, "good", "bad", 8) + ) + testObjs.foreach{ + obj => + val strRepr = SerializedState.fromObjectToString(obj) + val objRepr = SerializedState.stringToObject(strRepr) + objRepr match { + case value: Array[_] => + println(value.mkString(",")) + case _ => + println(objRepr) + } + } } } From 289787c2e478b85393232dc60e35ee58e5e62c4b Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Fri, 12 Apr 2024 16:33:59 -0700 Subject: [PATCH 010/163] update --- .../ics/texera/workflow/common/Marker.scala | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index cecc2504a87..e946ed7a0b2 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -1,9 +1,18 @@ package edu.uci.ics.texera.workflow.common -import scala.collection.mutable +import edu.uci.ics.amber.engine.common.tuple.amber.SpecialTupleLike -sealed trait Marker {} +sealed trait Marker extends SpecialTupleLike + +final case class EndOfUpstream() extends Marker { + override def getFields: Array[Any] = Array("EndOfUpstream") +} + +final case class EndOfIteration() extends Marker { + override def getFields: Array[Any] = Array("EndOfIteration") +} + +final case class State(value: String) extends Marker { + override def getFields: Array[Any] = Array("State") +} -final case class EndOfUpstream() extends Marker -final case class EndOfIteration() extends Marker -final case class State(key: String, value: Any) extends Marker From 3904018b19dbaf75761dda2457683310d5596e80 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Tue, 16 Apr 2024 03:33:36 -0700 Subject: [PATCH 011/163] update --- .../messaginglayer/OutputManager.scala | 14 ++-- .../partitioners/Partitioner.scala | 7 +- .../architecture/worker/DataProcessor.scala | 34 +++++----- .../worker/promisehandlers/StartHandler.scala | 4 +- .../ics/texera/workflow/common/Marker.scala | 4 ++ .../workflow/common/operators/LogicalOp.scala | 29 ++------ .../common/operators/OperatorExecutor.scala | 4 +- .../workflow/operators/test/TestAOpDesc.scala | 66 +++++++++++++++++++ .../workflow/operators/test/TestAOpExec.scala | 24 +++++++ .../workflow/operators/test/TestBOpDesc.scala | 66 +++++++++++++++++++ .../workflow/operators/test/TestBOpExec.scala | 21 ++++++ .../messaginglayer/OutputManagerSpec.scala | 2 +- 12 files changed, 227 insertions(+), 48 deletions(-) create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManager.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManager.scala index 6635d429bba..b6794dd0eae 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManager.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManager.scala @@ -1,10 +1,6 @@ package edu.uci.ics.amber.engine.architecture.messaginglayer -import edu.uci.ics.amber.engine.architecture.messaginglayer.OutputManager.{ - DPOutputIterator, - getBatchSize, - toPartitioner -} +import edu.uci.ics.amber.engine.architecture.messaginglayer.OutputManager.{DPOutputIterator, getBatchSize, toPartitioner} import edu.uci.ics.amber.engine.architecture.sendsemantics.partitioners._ import edu.uci.ics.amber.engine.architecture.sendsemantics.partitionings._ import edu.uci.ics.amber.engine.architecture.worker.DataProcessor.{FinalizeExecutor, FinalizePort} @@ -13,6 +9,7 @@ import edu.uci.ics.amber.engine.common.rpc.AsyncRPCServer.ControlCommand import edu.uci.ics.amber.engine.common.tuple.amber.{SchemaEnforceable, TupleLike} import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, ChannelIdentity} import edu.uci.ics.amber.engine.common.workflow.{PhysicalLink, PortIdentity} +import edu.uci.ics.texera.workflow.common.Marker import edu.uci.ics.texera.workflow.common.tuple.schema.Schema import scala.collection.mutable @@ -172,6 +169,13 @@ class OutputManager( }) } + def emitMarker(marker: Marker): Unit = { + networkOutputBuffers.foreach(kv => { + kv._2.flush() + kv._2.sendMarker(marker) + }) + } + def addPort(portId: PortIdentity, schema: Schema): Unit = { // each port can only be added and initialized once. if (this.ports.contains(portId)) { diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala index c479d308c81..a263f79c9fa 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala @@ -4,7 +4,7 @@ import edu.uci.ics.amber.engine.architecture.messaginglayer.NetworkOutputGateway import edu.uci.ics.amber.engine.common.AmberConfig import edu.uci.ics.amber.engine.common.ambermessage.{DataFrame, MarkerFrame} import edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity -import edu.uci.ics.texera.workflow.common.EndOfUpstream +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, Marker} import edu.uci.ics.texera.workflow.common.tuple.Tuple import scala.collection.mutable.ArrayBuffer @@ -35,6 +35,11 @@ class NetworkOutputBuffer( dataOutputPort.sendTo(to, MarkerFrame(EndOfUpstream())) } + def sendMarker(marker: Marker): Unit = { + flush() + dataOutputPort.sendTo(to, MarkerFrame(marker)) + } + def flush(): Unit = { if (buffer.nonEmpty) { dataOutputPort.sendTo(to, DataFrame(buffer.toArray)) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index 18106d10017..cbc72892767 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -7,20 +7,12 @@ import edu.uci.ics.amber.engine.architecture.controller.promisehandlers.PortComp import edu.uci.ics.amber.engine.architecture.controller.promisehandlers.WorkerExecutionCompletedHandler.WorkerExecutionCompleted import edu.uci.ics.amber.engine.architecture.controller.promisehandlers.WorkerStateUpdatedHandler.WorkerStateUpdated import edu.uci.ics.amber.engine.architecture.logreplay.ReplayLogManager -import edu.uci.ics.amber.engine.architecture.messaginglayer.{ - InputManager, - OutputManager, - WorkerTimerService -} +import edu.uci.ics.amber.engine.architecture.messaginglayer.{InputManager, OutputManager, WorkerTimerService} import edu.uci.ics.amber.engine.architecture.worker.DataProcessor.{FinalizeExecutor, FinalizePort} import edu.uci.ics.amber.engine.architecture.worker.WorkflowWorker.MainThreadDelegateMessage import edu.uci.ics.amber.engine.architecture.worker.managers.SerializationManager import edu.uci.ics.amber.engine.architecture.worker.promisehandlers.PauseHandler.PauseWorker -import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerState.{ - COMPLETED, - READY, - RUNNING -} +import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerState.{COMPLETED, READY, RUNNING} import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerStatistics import edu.uci.ics.amber.engine.common.ambermessage._ import edu.uci.ics.amber.engine.common.statetransition.WorkerStateManager @@ -29,7 +21,7 @@ import edu.uci.ics.amber.engine.common.virtualidentity.util.{CONTROLLER, SELF} import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, ChannelIdentity} import edu.uci.ics.amber.engine.common.workflow.PortIdentity import edu.uci.ics.amber.error.ErrorUtils.{mkConsoleMessage, safely} -import edu.uci.ics.texera.workflow.common.EndOfUpstream +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfUpstream} import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple @@ -143,7 +135,7 @@ class DataProcessor( outputTuple match { case FinalizeExecutor() => - outputManager.emitEndOfUpstream() + outputManager.emitMarker(EndOfUpstream()) // Send Completed signal to worker actor. executor.close() adaptiveBatchingMonitor.stopAdaptiveBatching() @@ -184,6 +176,7 @@ class DataProcessor( dataPayload: DataPayload ): Unit = { val dataProcessingStartTime = System.nanoTime() + val portId = this.inputGateway.getChannel(channelId).getPortId dataPayload match { case DataFrame(tuples) => stateManager.conditionalTransitTo( @@ -200,12 +193,21 @@ class DataProcessor( processInputTuple(inputManager.getNextTuple) case MarkerFrame(marker) => marker match { + case StartOfUpstream() => + this.inputManager.getPort(portId).channels(channelId) = true + if (inputManager.isPortCompleted(portId)) { + inputManager.initBatch(channelId, Array.empty) + processEndOfUpstream() + outputManager.outputIterator.appendSpecialTupleToEnd( + FinalizePort(portId, input = true) + ) + } + if (inputManager.getAllPorts.forall(portId => inputManager.isPortCompleted(portId))) { + // assuming all the output ports finalize after all input ports are finalized. + outputManager.finalizeOutput() + } case EndOfUpstream() => - val channel = this.inputGateway.getChannel(channelId) - val portId = channel.getPortId - this.inputManager.getPort(portId).channels(channelId) = true - if (inputManager.isPortCompleted(portId)) { inputManager.initBatch(channelId, Array.empty) processEndOfUpstream() diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala index 9a5298040ee..aa637e70f1f 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala @@ -11,7 +11,7 @@ import edu.uci.ics.amber.engine.common.rpc.AsyncRPCServer.ControlCommand import edu.uci.ics.amber.engine.common.virtualidentity.ChannelIdentity import edu.uci.ics.amber.engine.common.virtualidentity.util.SOURCE_STARTER_ACTOR import edu.uci.ics.amber.engine.common.workflow.PortIdentity -import edu.uci.ics.texera.workflow.common.EndOfUpstream +import edu.uci.ics.texera.workflow.common.StartOfUpstream object StartHandler { final case class StartWorker() extends ControlCommand[WorkerState] @@ -33,7 +33,7 @@ trait StartHandler { .setPortId(dummyInputPortId) dp.processDataPayload( ChannelIdentity(SOURCE_STARTER_ACTOR, dp.actorId, isControl = false), - MarkerFrame(EndOfUpstream()) + MarkerFrame(StartOfUpstream()) ) dp.stateManager.getCurrentState } else { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index e946ed7a0b2..fd0a03042a3 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -4,6 +4,10 @@ import edu.uci.ics.amber.engine.common.tuple.amber.SpecialTupleLike sealed trait Marker extends SpecialTupleLike +final case class StartOfUpstream() extends Marker { + override def getFields: Array[Any] = Array("StartOfUpstream") +} + final case class EndOfUpstream() extends Marker { override def getFields: Array[Any] = Array("EndOfUpstream") } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala index 016cb9a23f6..6a224050981 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala @@ -1,20 +1,10 @@ package edu.uci.ics.texera.workflow.common.operators import com.fasterxml.jackson.annotation.JsonSubTypes.Type -import com.fasterxml.jackson.annotation.{ - JsonIgnore, - JsonProperty, - JsonSubTypes, - JsonTypeInfo, - JsonPropertyDescription -} +import com.fasterxml.jackson.annotation.{JsonIgnore, JsonProperty, JsonPropertyDescription, JsonSubTypes, JsonTypeInfo} import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp -import edu.uci.ics.amber.engine.common.virtualidentity.{ - ExecutionIdentity, - OperatorIdentity, - WorkflowIdentity -} +import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, OperatorIdentity, WorkflowIdentity} import edu.uci.ics.amber.engine.common.workflow.PortIdentity import edu.uci.ics.texera.web.OPversion import edu.uci.ics.texera.workflow.common.metadata.{OperatorInfo, PropertyNameConstants} @@ -43,10 +33,7 @@ import edu.uci.ics.texera.workflow.operators.sink.managed.ProgressiveSinkOpDesc import edu.uci.ics.texera.workflow.operators.sort.SortOpDesc import edu.uci.ics.texera.workflow.operators.sortPartitions.SortPartitionsOpDesc import edu.uci.ics.texera.workflow.operators.source.apis.reddit.RedditSearchSourceOpDesc -import edu.uci.ics.texera.workflow.operators.source.apis.twitter.v2.{ - TwitterFullArchiveSearchSourceOpDesc, - TwitterSearchSourceOpDesc -} +import edu.uci.ics.texera.workflow.operators.source.apis.twitter.v2.{TwitterFullArchiveSearchSourceOpDesc, TwitterSearchSourceOpDesc} import edu.uci.ics.texera.workflow.operators.source.fetcher.URLFetcherOpDesc import edu.uci.ics.texera.workflow.operators.source.scan.FileScanSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.scan.csv.CSVScanSourceOpDesc @@ -58,15 +45,11 @@ import edu.uci.ics.texera.workflow.operators.source.sql.mysql.MySQLSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.sql.postgresql.PostgreSQLSourceOpDesc import edu.uci.ics.texera.workflow.operators.split.SplitOpDesc import edu.uci.ics.texera.workflow.operators.symmetricDifference.SymmetricDifferenceOpDesc +import edu.uci.ics.texera.workflow.operators.test.{TestAOpDesc, TestBOpDesc} import edu.uci.ics.texera.workflow.operators.typecasting.TypeCastingOpDesc import edu.uci.ics.texera.workflow.operators.udf.java.JavaUDFOpDesc import edu.uci.ics.texera.workflow.operators.udf.python.source.PythonUDFSourceOpDescV2 -import edu.uci.ics.texera.workflow.operators.udf.python.{ - DualInputPortsPythonUDFOpDescV2, - PythonLambdaFunctionOpDesc, - PythonTableReducerOpDesc, - PythonUDFOpDescV2 -} +import edu.uci.ics.texera.workflow.operators.udf.python.{DualInputPortsPythonUDFOpDescV2, PythonLambdaFunctionOpDesc, PythonTableReducerOpDesc, PythonUDFOpDescV2} import edu.uci.ics.texera.workflow.operators.union.UnionOpDesc import edu.uci.ics.texera.workflow.operators.unneststring.UnnestStringOpDesc import edu.uci.ics.texera.workflow.operators.visualization.boxPlot.BoxPlotOpDesc @@ -146,6 +129,8 @@ trait StateTransferFunc new Type(value = classOf[AsterixDBSourceOpDesc], name = "AsterixDBSource"), new Type(value = classOf[TypeCastingOpDesc], name = "TypeCasting"), new Type(value = classOf[LimitOpDesc], name = "Limit"), + new Type(value = classOf[TestAOpDesc], name = "TestA"), + new Type(value = classOf[TestBOpDesc], name = "TestB"), new Type(value = classOf[RandomKSamplingOpDesc], name = "RandomKSampling"), new Type(value = classOf[ReservoirSamplingOpDesc], name = "ReservoirSampling"), new Type(value = classOf[HashJoinOpDesc[String]], name = "HashJoin"), diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala index 26783039ef8..a606096799b 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala @@ -27,6 +27,8 @@ trait OperatorExecutor { } def onFinish(port: Int): Iterator[TupleLike] = Iterator.empty - def onOutputFinish(port: Int): Iterator[State] = Iterator.empty + def onInputStart(port: Int): Iterator[State] = Iterator.empty + + def onOutputFinish(port: Int): State = State("place holder") } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpDesc.scala new file mode 100644 index 00000000000..902124524c7 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpDesc.scala @@ -0,0 +1,66 @@ +package edu.uci.ics.texera.workflow.operators.test + +import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription} +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle +import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp +import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo +import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} +import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort} +import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} +import edu.uci.ics.texera.workflow.common.operators.{LogicalOp, StateTransferFunc} +import edu.uci.ics.texera.workflow.common.tuple.schema.Schema + +import scala.util.{Success, Try} + +class TestAOpDesc extends LogicalOp { + + @JsonProperty(required = true) + @JsonSchemaTitle("Limit") + @JsonPropertyDescription("the max number of output rows") + var limit: Int = _ + + override def getPhysicalOp( + workflowId: WorkflowIdentity, + executionId: ExecutionIdentity + ): PhysicalOp = { + PhysicalOp + .oneToOnePhysicalOp( + workflowId, + executionId, + operatorIdentifier, + OpExecInitInfo((_, _) => { + new TestAOpExec(limit) + }) + ) + .withInputPorts(operatorInfo.inputPorts) + .withOutputPorts(operatorInfo.outputPorts) + .withParallelizable(false) + } + + override def operatorInfo: OperatorInfo = + OperatorInfo( + "TestA", + "Limit the number of output rows", + OperatorGroupConstants.CLEANING_GROUP, + inputPorts = List(InputPort()), + outputPorts = List(OutputPort()), + supportReconfiguration = true + ) + + override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(0) + + override def runtimeReconfiguration( + workflowId: WorkflowIdentity, + executionId: ExecutionIdentity, + oldLogicalOp: LogicalOp, + newLogicalOp: LogicalOp + ): Try[(PhysicalOp, Option[StateTransferFunc])] = { + val newPhysicalOp = newLogicalOp.getPhysicalOp(workflowId, executionId) + val stateTransferFunc: StateTransferFunc = (oldOp, newOp) => { + val oldLimitOp = oldOp.asInstanceOf[TestAOpExec] + val newLimitOp = newOp.asInstanceOf[TestAOpExec] + newLimitOp.count = oldLimitOp.count + } + Success(newPhysicalOp, Some(stateTransferFunc)) + } +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala new file mode 100644 index 00000000000..eea6cd89eb1 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala @@ -0,0 +1,24 @@ +package edu.uci.ics.texera.workflow.operators.test + +import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike +import edu.uci.ics.texera.workflow.common.State +import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor +import edu.uci.ics.texera.workflow.common.tuple.Tuple + +class TestAOpExec(limit: Int) extends OperatorExecutor { + var count = 0 + + override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { + if (count < limit) { + count += 1 + Iterator(tuple) + } else { + Iterator() + } + } + + override def onOutputFinish(port: Int): State = { + State("finished") + } + +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpDesc.scala new file mode 100644 index 00000000000..23a506adca2 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpDesc.scala @@ -0,0 +1,66 @@ +package edu.uci.ics.texera.workflow.operators.test + +import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription} +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle +import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp +import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo +import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} +import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort} +import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} +import edu.uci.ics.texera.workflow.common.operators.{LogicalOp, StateTransferFunc} +import edu.uci.ics.texera.workflow.common.tuple.schema.Schema + +import scala.util.{Success, Try} + +class TestBOpDesc extends LogicalOp { + + @JsonProperty(required = true) + @JsonSchemaTitle("Limit") + @JsonPropertyDescription("the max number of output rows") + var limit: Int = _ + + override def getPhysicalOp( + workflowId: WorkflowIdentity, + executionId: ExecutionIdentity + ): PhysicalOp = { + PhysicalOp + .oneToOnePhysicalOp( + workflowId, + executionId, + operatorIdentifier, + OpExecInitInfo((_, _) => { + new TestAOpExec(limit) + }) + ) + .withInputPorts(operatorInfo.inputPorts) + .withOutputPorts(operatorInfo.outputPorts) + .withParallelizable(false) + } + + override def operatorInfo: OperatorInfo = + OperatorInfo( + "TestB", + "Limit the number of output rows", + OperatorGroupConstants.CLEANING_GROUP, + inputPorts = List(InputPort()), + outputPorts = List(OutputPort()), + supportReconfiguration = true + ) + + override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(0) + + override def runtimeReconfiguration( + workflowId: WorkflowIdentity, + executionId: ExecutionIdentity, + oldLogicalOp: LogicalOp, + newLogicalOp: LogicalOp + ): Try[(PhysicalOp, Option[StateTransferFunc])] = { + val newPhysicalOp = newLogicalOp.getPhysicalOp(workflowId, executionId) + val stateTransferFunc: StateTransferFunc = (oldOp, newOp) => { + val oldLimitOp = oldOp.asInstanceOf[TestBOpExec] + val newLimitOp = newOp.asInstanceOf[TestBOpExec] + newLimitOp.count = oldLimitOp.count + } + Success(newPhysicalOp, Some(stateTransferFunc)) + } +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala new file mode 100644 index 00000000000..d98d20b9877 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala @@ -0,0 +1,21 @@ +package edu.uci.ics.texera.workflow.operators.test + +import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike +import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor +import edu.uci.ics.texera.workflow.common.tuple.Tuple + +class TestBOpExec(limit: Int) extends OperatorExecutor { + var count = 0 + + override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { + + if (count < limit) { + count += 1 + Iterator(tuple) + } else { + Iterator() + } + + } + +} diff --git a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManagerSpec.scala b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManagerSpec.scala index 05963bf97cf..feb3f1abf5d 100644 --- a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManagerSpec.scala +++ b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManagerSpec.scala @@ -80,7 +80,7 @@ class OutputManagerSpec extends AnyFlatSpec with MockFactory { tuples.foreach { t => outputManager.passTupleToDownstream(TupleLike(t.getFields), None) } - outputManager.emitEndOfUpstream() + outputManager.emitMarker(EndOfUpstream()) } } From 58d6afdb92f8f4ed26ee30e45c56447a1a159c1b Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Tue, 16 Apr 2024 13:49:41 -0700 Subject: [PATCH 012/163] update --- .../architecture/worker/DataProcessor.scala | 43 +++++++++++++++---- .../engine/common/SinkOperatorExecutor.scala | 2 +- .../common/SourceOperatorExecutor.scala | 2 +- .../ics/texera/workflow/common/Marker.scala | 32 ++++++++------ .../common/operators/OperatorExecutor.scala | 6 +-- .../workflow/operators/test/TestAOpExec.scala | 7 +-- .../workflow/operators/test/TestBOpDesc.scala | 2 +- .../workflow/operators/test/TestBOpExec.scala | 10 ++++- 8 files changed, 72 insertions(+), 32 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index cbc72892767..d100563055e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -21,7 +21,7 @@ import edu.uci.ics.amber.engine.common.virtualidentity.util.{CONTROLLER, SELF} import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, ChannelIdentity} import edu.uci.ics.amber.engine.common.workflow.PortIdentity import edu.uci.ics.amber.error.ErrorUtils.{mkConsoleMessage, safely} -import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfUpstream} +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfUpstream, State} import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple @@ -92,14 +92,31 @@ class DataProcessor( } } + private[this] def processInputState(state: State): Unit = { + try { + val portIdentity: PortIdentity = + this.inputGateway.getChannel(inputManager.currentChannelId).getPortId + outputManager.emitMarker( + executor.processState( + state, + portIdentity.id + ) + ) + } catch safely { + case e => + // forward input tuple to the user and pause DP thread + handleExecutorException(e) + } + } + /** * process end of an input port with Executor.onFinish(). * this function is only called by the DP thread. */ - private[this] def processEndOfUpstream(): Unit = { + private[this] def processStartOfUpstream(): Unit = { try { outputManager.outputIterator.setTupleOutput( - executor.onFinishMultiPort( + executor.onInputFinishMultiPort( this.inputGateway.getChannel(inputManager.currentChannelId).getPortId.id ) ) @@ -110,6 +127,16 @@ class DataProcessor( } } + private[this] def processEndOfUpstream(): Unit = { + try { + outputManager.emitMarker(executor.onOutputFinish()) + } catch safely { + case e => + // forward input tuple to the user and pause DP thread + handleExecutorException(e) + } + } + /** transfer one tuple from iterator to downstream. * this function is only called by the DP thread */ @@ -192,15 +219,15 @@ class DataProcessor( inputManager.initBatch(channelId, tuples) processInputTuple(inputManager.getNextTuple) case MarkerFrame(marker) => + logger.error(s"unsupported marker type: $marker") marker match { + case state: State => + processInputState(state) case StartOfUpstream() => this.inputManager.getPort(portId).channels(channelId) = true if (inputManager.isPortCompleted(portId)) { inputManager.initBatch(channelId, Array.empty) - processEndOfUpstream() - outputManager.outputIterator.appendSpecialTupleToEnd( - FinalizePort(portId, input = true) - ) + processStartOfUpstream() } if (inputManager.getAllPorts.forall(portId => inputManager.isPortCompleted(portId))) { // assuming all the output ports finalize after all input ports are finalized. @@ -209,7 +236,6 @@ class DataProcessor( case EndOfUpstream() => this.inputManager.getPort(portId).channels(channelId) = true if (inputManager.isPortCompleted(portId)) { - inputManager.initBatch(channelId, Array.empty) processEndOfUpstream() outputManager.outputIterator.appendSpecialTupleToEnd( FinalizePort(portId, input = true) @@ -273,5 +299,4 @@ class DataProcessor( // invoke a pause in-place asyncRPCServer.execute(PauseWorker(), SELF) } - } diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SinkOperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SinkOperatorExecutor.scala index ac63e1f8409..052b90a0816 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SinkOperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SinkOperatorExecutor.scala @@ -15,7 +15,7 @@ trait SinkOperatorExecutor extends OperatorExecutor { Iterator.empty } - override def onFinishMultiPort(port: Int): Iterator[(TupleLike, Option[PortIdentity])] = + override def onInputFinishMultiPort(port: Int): Iterator[(TupleLike, Option[PortIdentity])] = Iterator.empty def consumeTuple(tuple: Tuple, input: Int): Unit diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SourceOperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SourceOperatorExecutor.scala index 4ba743e8136..2a956a85e43 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SourceOperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SourceOperatorExecutor.scala @@ -21,7 +21,7 @@ trait SourceOperatorExecutor extends OperatorExecutor { def produceTuple(): Iterator[TupleLike] - override def onFinishMultiPort(port: Int): Iterator[(TupleLike, Option[PortIdentity])] = { + override def onInputFinishMultiPort(port: Int): Iterator[(TupleLike, Option[PortIdentity])] = { // We assume there is only one input port for source operators. The current assumption // makes produceTuple to be invoked on each input port finish. // We should move this to onFinishAllPorts later. diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index fd0a03042a3..79790d4194e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -1,22 +1,30 @@ package edu.uci.ics.texera.workflow.common +import edu.uci.ics.amber.engine.common.SerializedState import edu.uci.ics.amber.engine.common.tuple.amber.SpecialTupleLike -sealed trait Marker extends SpecialTupleLike +import scala.collection.mutable -final case class StartOfUpstream() extends Marker { - override def getFields: Array[Any] = Array("StartOfUpstream") -} +sealed trait Marker -final case class EndOfUpstream() extends Marker { - override def getFields: Array[Any] = Array("EndOfUpstream") -} +final case class StartOfUpstream() extends Marker -final case class EndOfIteration() extends Marker { - override def getFields: Array[Any] = Array("EndOfIteration") -} +final case class EndOfUpstream() extends Marker + + +final case class State() extends Marker { + val list: mutable.Map[String, String] = mutable.HashMap() + + def add(key: String, value: Object): Unit = { + list.put(key, SerializedState.fromObjectToString(value)) + } -final case class State(value: String) extends Marker { - override def getFields: Array[Any] = Array("State") + def get(key: String): Object = { + //SerializedState.stringToObject(list.getOrElse(key, "")) + list.get(key) match { + case Some(value) => SerializedState.stringToObject(value) + case None => null + } + } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala index a606096799b..412445a1836 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala @@ -20,15 +20,15 @@ trait OperatorExecutor { def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] - def processMarker(marker: State, port: Int): Iterator[State] = Iterator.empty + def processState(state: State, port: Int): State = State() - def onFinishMultiPort(port: Int): Iterator[(TupleLike, Option[PortIdentity])] = { + def onInputFinishMultiPort(port: Int): Iterator[(TupleLike, Option[PortIdentity])] = { onFinish(port).map(t => (t, None)) } def onFinish(port: Int): Iterator[TupleLike] = Iterator.empty def onInputStart(port: Int): Iterator[State] = Iterator.empty - def onOutputFinish(port: Int): State = State("place holder") + def onOutputFinish(): State = State() } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala index eea6cd89eb1..55fca80e428 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala @@ -17,8 +17,9 @@ class TestAOpExec(limit: Int) extends OperatorExecutor { } } - override def onOutputFinish(port: Int): State = { - State("finished") + override def onOutputFinish(): State = { + val state = State() + state.add("count", "test") + state } - } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpDesc.scala index 23a506adca2..a61abaec7a4 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpDesc.scala @@ -29,7 +29,7 @@ class TestBOpDesc extends LogicalOp { executionId, operatorIdentifier, OpExecInitInfo((_, _) => { - new TestAOpExec(limit) + new TestBOpExec(limit) }) ) .withInputPorts(operatorInfo.inputPorts) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala index d98d20b9877..bbb3eba7cec 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala @@ -1,21 +1,27 @@ package edu.uci.ics.texera.workflow.operators.test import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike +import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple class TestBOpExec(limit: Int) extends OperatorExecutor { var count = 0 - override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { + var s = "" + override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { if (count < limit) { count += 1 Iterator(tuple) } else { Iterator() } - } + override def processState(state: State, port: Int): State = { + val objRepr = state.get("count") + println(objRepr) + state + } } From 49a321de830e0cb9df2071ba74b53ec1a3761720 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Sun, 12 May 2024 01:35:52 -0700 Subject: [PATCH 013/163] update --- .../architecture/worker/promisehandlers/StartHandler.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala index aa637e70f1f..8c1703479db 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala @@ -11,7 +11,7 @@ import edu.uci.ics.amber.engine.common.rpc.AsyncRPCServer.ControlCommand import edu.uci.ics.amber.engine.common.virtualidentity.ChannelIdentity import edu.uci.ics.amber.engine.common.virtualidentity.util.SOURCE_STARTER_ACTOR import edu.uci.ics.amber.engine.common.workflow.PortIdentity -import edu.uci.ics.texera.workflow.common.StartOfUpstream +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfUpstream} object StartHandler { final case class StartWorker() extends ControlCommand[WorkerState] @@ -33,7 +33,7 @@ trait StartHandler { .setPortId(dummyInputPortId) dp.processDataPayload( ChannelIdentity(SOURCE_STARTER_ACTOR, dp.actorId, isControl = false), - MarkerFrame(StartOfUpstream()) + MarkerFrame(EndOfUpstream()) ) dp.stateManager.getCurrentState } else { From 50203776b87890f407b33cb9df2aa6a4b8dc51b6 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Mon, 13 May 2024 03:52:18 +0200 Subject: [PATCH 014/163] update --- .../messaginglayer/OutputManager.scala | 11 ------ .../architecture/worker/DataProcessor.scala | 39 +++++++------------ .../common/operators/OperatorExecutor.scala | 2 +- .../workflow/operators/test/TestAOpExec.scala | 2 +- .../workflow/operators/test/TestBOpExec.scala | 2 + 5 files changed, 19 insertions(+), 37 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManager.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManager.scala index b6794dd0eae..31de2c1b81a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManager.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManager.scala @@ -158,17 +158,6 @@ class OutputManager( buffersToFlush.foreach(_.flush()) } - /** - * Send the last batch and EOU marker to all down streams - */ - def emitEndOfUpstream(): Unit = { - // flush all network buffers of this operator, emit end marker to network - networkOutputBuffers.foreach(kv => { - kv._2.flush() - kv._2.noMore() - }) - } - def emitMarker(marker: Marker): Unit = { networkOutputBuffers.foreach(kv => { kv._2.flush() diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index d100563055e..71cd5398eef 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -113,23 +113,17 @@ class DataProcessor( * process end of an input port with Executor.onFinish(). * this function is only called by the DP thread. */ - private[this] def processStartOfUpstream(): Unit = { + private[this] def processEndOfUpstream(): Unit = { try { outputManager.outputIterator.setTupleOutput( executor.onInputFinishMultiPort( this.inputGateway.getChannel(inputManager.currentChannelId).getPortId.id ) ) - } catch safely { - case e => - // forward input tuple to the user and pause DP thread - handleExecutorException(e) - } - } - - private[this] def processEndOfUpstream(): Unit = { - try { - outputManager.emitMarker(executor.onOutputFinish()) + val outputState = executor.produceState() + if (outputState!= null) { + outputManager.emitMarker(outputState) + } } catch safely { case e => // forward input tuple to the user and pause DP thread @@ -219,27 +213,24 @@ class DataProcessor( inputManager.initBatch(channelId, tuples) processInputTuple(inputManager.getNextTuple) case MarkerFrame(marker) => - logger.error(s"unsupported marker type: $marker") + marker match { case state: State => + val a = state.get("count") + logger.error(s"state marker: $marker") + logger.error(s"state marker: $a") processInputState(state) case StartOfUpstream() => - this.inputManager.getPort(portId).channels(channelId) = true - if (inputManager.isPortCompleted(portId)) { - inputManager.initBatch(channelId, Array.empty) - processStartOfUpstream() - } - if (inputManager.getAllPorts.forall(portId => inputManager.isPortCompleted(portId))) { - // assuming all the output ports finalize after all input ports are finalized. - outputManager.finalizeOutput() - } case EndOfUpstream() => + val channel = this.inputGateway.getChannel(channelId) + val portId = channel.getPortId + this.inputManager.getPort(portId).channels(channelId) = true + if (inputManager.isPortCompleted(portId)) { + inputManager.initBatch(channelId, Array.empty) processEndOfUpstream() - outputManager.outputIterator.appendSpecialTupleToEnd( - FinalizePort(portId, input = true) - ) + outputManager.outputIterator.appendSpecialTupleToEnd(FinalizePort(portId, input = true)) } if (inputManager.getAllPorts.forall(portId => inputManager.isPortCompleted(portId))) { // assuming all the output ports finalize after all input ports are finalized. diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala index 412445a1836..c0496042a47 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala @@ -29,6 +29,6 @@ trait OperatorExecutor { def onInputStart(port: Int): Iterator[State] = Iterator.empty - def onOutputFinish(): State = State() + def produceState(): State = null } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala index 55fca80e428..63313b0ee10 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala @@ -17,7 +17,7 @@ class TestAOpExec(limit: Int) extends OperatorExecutor { } } - override def onOutputFinish(): State = { + override def produceState(): State = { val state = State() state.add("count", "test") state diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala index bbb3eba7cec..58c70cd2c9a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala @@ -19,6 +19,8 @@ class TestBOpExec(limit: Int) extends OperatorExecutor { } } + + override def processState(state: State, port: Int): State = { val objRepr = state.get("count") println(objRepr) From 02f420f52c6153e43661748f30512680260941e2 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Sun, 19 May 2024 19:55:53 +0200 Subject: [PATCH 015/163] update --- .../pythonworker/PythonProxyClient.scala | 2 +- .../architecture/worker/DataProcessor.scala | 35 ++++--------------- .../amber/engine/common/SerializedState.scala | 4 +-- .../edu/uci/ics/texera/web/JsonTest.scala | 7 ++-- .../common/operators/OperatorExecutor.scala | 6 ++-- .../hashJoin/HashJoinBuildOpExec.scala | 16 +++++---- .../operators/hashJoin/HashJoinOpDesc.scala | 30 +++------------- .../hashJoin/HashJoinProbeOpExec.scala | 17 +++++---- .../workflow/operators/test/TestBOpExec.scala | 3 +- 9 files changed, 37 insertions(+), 83 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala index ae1e9f305ff..773bd9b548b 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala @@ -104,7 +104,7 @@ class PythonProxyClient(portNumberPromise: Promise[Int], val actorId: ActorVirtu dataPayload match { case DataFrame(frame) => val tuples: mutable.Queue[Tuple] = - mutable.Queue(frame.map(_.asInstanceOf[Tuple]).toSeq: _*) + mutable.Queue(frame.map(_).toSeq: _*) writeArrowStream(tuples, from, isEnd = false) case MarkerFrame(frame) => frame match { diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index b9457f13da8..0e55dfdaadc 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -92,33 +92,18 @@ class DataProcessor( } } - private[this] def processInputState(state: State): Unit = { - try { - val portIdentity: PortIdentity = - this.inputGateway.getChannel(inputManager.currentChannelId).getPortId - outputManager.emitMarker( - executor.processState( - state, - portIdentity.id - ) - ) - } catch safely { - case e => - // forward input tuple to the user and pause DP thread - handleExecutorException(e) - } + private[this] def processInputState(state: State, port: Int): Unit = { + executor.processState(state, port) } /** * process end of an input port with Executor.onFinish(). * this function is only called by the DP thread. */ - private[this] def processEndOfUpstream(): Unit = { + private[this] def processEndOfUpstream(port: Int): Unit = { try { outputManager.outputIterator.setTupleOutput( - executor.onInputFinishMultiPort( - this.inputGateway.getChannel(inputManager.currentChannelId).getPortId.id - ) + executor.onInputFinishMultiPort(port) ) val outputState = executor.produceState() if (outputState!= null) { @@ -213,23 +198,15 @@ class DataProcessor( inputManager.initBatch(channelId, tuples) processInputTuple(inputManager.getNextTuple) case MarkerFrame(marker) => - marker match { case state: State => - val a = state.get("count") - logger.error(s"state marker: $marker") - logger.error(s"state marker: $a") - processInputState(state) + processInputState(state, portId.id) case StartOfUpstream() => case EndOfUpstream() => - val channel = this.inputGateway.getChannel(channelId) - val portId = channel.getPortId - this.inputManager.getPort(portId).channels(channelId) = true - if (inputManager.isPortCompleted(portId)) { inputManager.initBatch(channelId, Array.empty) - processEndOfUpstream() + processEndOfUpstream(portId.id) outputManager.outputIterator.appendSpecialTupleToEnd(FinalizePort(portId, input = true)) } if (inputManager.getAllPorts.forall(portId => inputManager.isPortCompleted(portId))) { diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SerializedState.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SerializedState.scala index 44faf54c304..870eb3f23f5 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SerializedState.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SerializedState.scala @@ -18,13 +18,13 @@ object SerializedState { } def fromObjectToString[T <: AnyRef](obj:T):String = { - val serializedObj = fromObject(obj, AmberUtils.serde) + val serializedObj = fromObject(obj, AmberRuntime.serde) s"${serializedObj.manifest}amber_serialization${serializedObj.serializerId}amber_serialization" + (serializedObj.bytes.map(_.toChar)).mkString } def stringToObject(str:String):AnyRef = { val fields = str.split("amber_serialization") - SerializedState(fields(2).map(_.toByte).toArray,fields(1).toInt,fields(0)).toObject[AnyRef](AmberUtils.serde) + SerializedState(fields(2).map(_.toByte).toArray,fields(1).toInt,fields(0)).toObject[AnyRef](AmberRuntime.serde) } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala index 89d0de14178..e9b196b1caf 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala @@ -1,16 +1,13 @@ package edu.uci.ics.texera.web -import edu.uci.ics.amber.engine.common.{AmberUtils, SerializedState} -import edu.uci.ics.texera.Utils -import edu.uci.ics.texera.web.workflowruntimestate.WorkflowAggregatedState -import edu.uci.ics.texera.web.workflowruntimestate.WorkflowAggregatedState.RUNNING +import edu.uci.ics.amber.engine.common.{AmberRuntime, SerializedState} import scala.collection.mutable object JsonTest { def main(args: Array[String]): Unit = { - AmberUtils.startActorWorker(None) + AmberRuntime.startActorWorker(None) val testObjs = Array( Map(1 -> "123", 3 -> "1231234"), mutable.HashMap[String, Any]("name" -> "peter", "mail" -> "peter@uci.edu", "grade" -> 4.0), diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala index c0496042a47..769eed2dc15 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala @@ -19,9 +19,7 @@ trait OperatorExecutor { } def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] - - def processState(state: State, port: Int): State = State() - + def onInputFinishMultiPort(port: Int): Iterator[(TupleLike, Option[PortIdentity])] = { onFinish(port).map(t => (t, None)) } @@ -31,4 +29,6 @@ trait OperatorExecutor { def produceState(): State = null + def processState(state: State, port: Int): Unit = {} + } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala index 3c8d2bda78d..9c8bf577c7f 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala @@ -1,6 +1,7 @@ package edu.uci.ics.texera.workflow.operators.hashJoin import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike +import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple @@ -9,23 +10,24 @@ import scala.collection.mutable.ListBuffer class HashJoinBuildOpExec[K](buildAttributeName: String) extends OperatorExecutor { - var buildTableHashMap: mutable.HashMap[K, ListBuffer[Tuple]] = _ + var buildTableHashMap: mutable.HashMap[K, (ListBuffer[Tuple], Boolean)] = _ override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { val key = tuple.getField(buildAttributeName).asInstanceOf[K] - buildTableHashMap.getOrElseUpdate(key, new ListBuffer[Tuple]()) += tuple + buildTableHashMap.getOrElseUpdate(key, (new ListBuffer[Tuple](), false))._1 += tuple Iterator() } - override def onFinish(port: Int): Iterator[TupleLike] = { - buildTableHashMap.iterator.flatMap { - case (k, v) => v.map(t => TupleLike(List(k) ++ t.getFields)) - } + + override def produceState(): State = { + val state = State() + state.add("hashtable", buildTableHashMap) + state } override def open(): Unit = { - buildTableHashMap = new mutable.HashMap[K, mutable.ListBuffer[Tuple]]() + buildTableHashMap = new mutable.HashMap[K, (mutable.ListBuffer[Tuple], Boolean)]() } override def close(): Unit = { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala index cf7aeeedf7d..a52444fd77b 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala @@ -1,29 +1,17 @@ package edu.uci.ics.texera.workflow.operators.hashJoin -import edu.uci.ics.texera.workflow.operators.hashJoin.HashJoinOpDesc.HASH_JOIN_INTERNAL_KEY_NAME import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription} import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle} -import edu.uci.ics.amber.engine.architecture.deploysemantics.{PhysicalOp, SchemaPropagationFunc} import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo -import edu.uci.ics.amber.engine.common.virtualidentity.{ - ExecutionIdentity, - PhysicalOpIdentity, - WorkflowIdentity -} +import edu.uci.ics.amber.engine.architecture.deploysemantics.{PhysicalOp, SchemaPropagationFunc} +import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, PhysicalOpIdentity, WorkflowIdentity} import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort, PhysicalLink, PortIdentity} -import edu.uci.ics.texera.workflow.common.metadata.annotations.{ - AutofillAttributeName, - AutofillAttributeNameOnPort1 -} +import edu.uci.ics.texera.workflow.common.metadata.annotations.{AutofillAttributeName, AutofillAttributeNameOnPort1} import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} import edu.uci.ics.texera.workflow.common.operators.LogicalOp -import edu.uci.ics.texera.workflow.common.tuple.schema.{Attribute, AttributeType, Schema} +import edu.uci.ics.texera.workflow.common.tuple.schema.{Attribute, Schema} import edu.uci.ics.texera.workflow.common.workflow.{HashPartition, PhysicalPlan} -object HashJoinOpDesc { - val HASH_JOIN_INTERNAL_KEY_NAME = "__internal__hashtable__key__" -} - @JsonSchemaInject(json = """ { "attributeTypeRules": { @@ -78,13 +66,11 @@ class HashJoinOpDesc[K] extends LogicalOp { Map( PortIdentity(internal = true) -> Schema .builder() - .add(HASH_JOIN_INTERNAL_KEY_NAME, AttributeType.ANY) .add(inputSchemas(operatorInfo.inputPorts.head.id)) .build() ) ) ) - .withDerivePartition(_ => HashPartition(List(HASH_JOIN_INTERNAL_KEY_NAME))) .withParallelizable(true) val probeBuildInputPort = InputPort(PortIdentity(0, internal = true)) @@ -112,12 +98,7 @@ class HashJoinOpDesc[K] extends LogicalOp { ) ) .withOutputPorts(List(probeOutputPort)) - .withPartitionRequirement( - List( - Option(HashPartition(List(HASH_JOIN_INTERNAL_KEY_NAME))), - Option(HashPartition(List(probeAttributeName))) - ) - ) + .withPartitionRequirement(List(Option(HashPartition(List(probeAttributeName))))) .withDerivePartition(_ => HashPartition(List(probeAttributeName))) .withParallelizable(true) .withPropagateSchema( @@ -161,7 +142,6 @@ class HashJoinOpDesc[K] extends LogicalOp { val probeSchema = schemas(1) val builder = Schema.builder() builder.add(buildSchema) - builder.removeIfExists(HASH_JOIN_INTERNAL_KEY_NAME) val leftAttributeNames = buildSchema.getAttributeNames val rightAttributeNames = probeSchema.getAttributeNames.filterNot(name => name == probeAttributeName) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala index c6a5e0bef85..37fdbde12e0 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala @@ -1,10 +1,9 @@ package edu.uci.ics.texera.workflow.operators.hashJoin import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike +import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple -import edu.uci.ics.texera.workflow.operators.hashJoin.HashJoinOpDesc.HASH_JOIN_INTERNAL_KEY_NAME - import scala.collection.mutable import scala.collection.mutable.ListBuffer @@ -44,20 +43,20 @@ class HashJoinProbeOpExec[K]( probeAttributeName: String, joinType: JoinType ) extends OperatorExecutor { - var currentTuple: Tuple = _ var buildTableHashMap: mutable.HashMap[K, (ListBuffer[Tuple], Boolean)] = _ + override def processState(state: State, port: Int): Unit = { + buildTableHashMap = state.get("hashtable").asInstanceOf[mutable.HashMap[K, (mutable.ListBuffer[Tuple], Boolean)]] + } + + override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = if (port == 0) { - // Load build hash map - val key = tuple.getField[K](HASH_JOIN_INTERNAL_KEY_NAME) - buildTableHashMap.getOrElseUpdate(key, (new ListBuffer[Tuple](), false))._1 += tuple - .getPartialTuple( - tuple.getSchema.getAttributeNames.filterNot(n => n == HASH_JOIN_INTERNAL_KEY_NAME) - ) Iterator.empty } else { + System.err.println(buildTableHashMap) + System.err.println(tuple) // Probe phase val key = tuple.getField(probeAttributeName).asInstanceOf[K] val (matchedTuples, joined) = diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala index 58c70cd2c9a..625e5b757a5 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala @@ -21,9 +21,8 @@ class TestBOpExec(limit: Int) extends OperatorExecutor { - override def processState(state: State, port: Int): State = { + override def processState(state: State, port: Int): Unit = { val objRepr = state.get("count") println(objRepr) - state } } From e50ad09147ab67f1e9fedffb0a39e3abb6964249 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Sun, 19 May 2024 19:57:58 +0200 Subject: [PATCH 016/163] update --- .../engine/architecture/pythonworker/PythonProxyClient.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala index 773bd9b548b..ae1e9f305ff 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala @@ -104,7 +104,7 @@ class PythonProxyClient(portNumberPromise: Promise[Int], val actorId: ActorVirtu dataPayload match { case DataFrame(frame) => val tuples: mutable.Queue[Tuple] = - mutable.Queue(frame.map(_).toSeq: _*) + mutable.Queue(frame.map(_.asInstanceOf[Tuple]).toSeq: _*) writeArrowStream(tuples, from, isEnd = false) case MarkerFrame(frame) => frame match { From 6981ea1630e9c30729eff35df2c6408609f6ea61 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Sat, 27 Jul 2024 17:51:25 -0700 Subject: [PATCH 017/163] fix --- .../architecture/worker/promisehandlers/StartHandler.scala | 2 +- .../main/scala/edu/uci/ics/texera/workflow/common/Marker.scala | 1 - .../ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala index 8c1703479db..9a5298040ee 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala @@ -11,7 +11,7 @@ import edu.uci.ics.amber.engine.common.rpc.AsyncRPCServer.ControlCommand import edu.uci.ics.amber.engine.common.virtualidentity.ChannelIdentity import edu.uci.ics.amber.engine.common.virtualidentity.util.SOURCE_STARTER_ACTOR import edu.uci.ics.amber.engine.common.workflow.PortIdentity -import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfUpstream} +import edu.uci.ics.texera.workflow.common.EndOfUpstream object StartHandler { final case class StartWorker() extends ControlCommand[WorkerState] diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index 79790d4194e..d8d3ac85fcd 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -1,7 +1,6 @@ package edu.uci.ics.texera.workflow.common import edu.uci.ics.amber.engine.common.SerializedState -import edu.uci.ics.amber.engine.common.tuple.amber.SpecialTupleLike import scala.collection.mutable diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala index ee84c1e2dad..1c8a938b4cd 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala @@ -10,7 +10,7 @@ import edu.uci.ics.texera.workflow.common.metadata.annotations.{AutofillAttribut import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} import edu.uci.ics.texera.workflow.common.operators.LogicalOp import edu.uci.ics.texera.workflow.common.tuple.schema.{Attribute, Schema} -import edu.uci.ics.texera.workflow.common.workflow.{HashPartition, PhysicalPlan} +import edu.uci.ics.texera.workflow.common.workflow.{HashPartition, OneToOnePartition, PhysicalPlan} @JsonSchemaInject(json = """ { From 2c0c8d4ada872920b0751421b6a9bf75279da77f Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Mon, 29 Jul 2024 16:44:53 -0700 Subject: [PATCH 018/163] fix --- .../engine/architecture/pythonworker/PythonProxyClient.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala index ae1e9f305ff..773bd9b548b 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala @@ -104,7 +104,7 @@ class PythonProxyClient(portNumberPromise: Promise[Int], val actorId: ActorVirtu dataPayload match { case DataFrame(frame) => val tuples: mutable.Queue[Tuple] = - mutable.Queue(frame.map(_.asInstanceOf[Tuple]).toSeq: _*) + mutable.Queue(frame.map(_).toSeq: _*) writeArrowStream(tuples, from, isEnd = false) case MarkerFrame(frame) => frame match { From a4e49824eba9e2902383dfaa2ec6e780c31d1012 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 31 Jul 2024 03:24:18 -0700 Subject: [PATCH 019/163] fix fmt --- .../engine/architecture/pythonworker/PythonProxyClient.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala index 773bd9b548b..ae1e9f305ff 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala @@ -104,7 +104,7 @@ class PythonProxyClient(portNumberPromise: Promise[Int], val actorId: ActorVirtu dataPayload match { case DataFrame(frame) => val tuples: mutable.Queue[Tuple] = - mutable.Queue(frame.map(_).toSeq: _*) + mutable.Queue(frame.map(_.asInstanceOf[Tuple]).toSeq: _*) writeArrowStream(tuples, from, isEnd = false) case MarkerFrame(frame) => frame match { From e47dc9d4ebd6b47527a59c592e74499bb02eccb0 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sat, 3 Aug 2024 02:52:07 -0700 Subject: [PATCH 020/163] update --- .../uci/ics/texera/workflow/common/Marker.scala | 17 ++++++----------- .../hashJoin/HashJoinBuildOpExec.scala | 3 ++- .../hashJoin/HashJoinProbeOpExec.scala | 2 -- .../workflow/operators/test/TestAOpExec.scala | 3 ++- 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index d8d3ac85fcd..b4ad512b0f4 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -1,6 +1,6 @@ package edu.uci.ics.texera.workflow.common -import edu.uci.ics.amber.engine.common.SerializedState +import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType import scala.collection.mutable @@ -10,20 +10,15 @@ final case class StartOfUpstream() extends Marker final case class EndOfUpstream() extends Marker - final case class State() extends Marker { - val list: mutable.Map[String, String] = mutable.HashMap() + val list: mutable.Map[String, (AttributeType, Any)] = mutable.HashMap() - def add(key: String, value: Object): Unit = { - list.put(key, SerializedState.fromObjectToString(value)) + def add(attributeName: String, attributeType: AttributeType, field: Any): Unit = { + list.put(attributeName, (attributeType, field)) } - def get(key: String): Object = { - //SerializedState.stringToObject(list.getOrElse(key, "")) - list.get(key) match { - case Some(value) => SerializedState.stringToObject(value) - case None => null - } + def get(key: String): Any = { + list(key)._2 } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala index 9c8bf577c7f..697da47334d 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala @@ -4,6 +4,7 @@ import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple +import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType import scala.collection.mutable import scala.collection.mutable.ListBuffer @@ -22,7 +23,7 @@ class HashJoinBuildOpExec[K](buildAttributeName: String) extends OperatorExecuto override def produceState(): State = { val state = State() - state.add("hashtable", buildTableHashMap) + state.add("hashtable", AttributeType.ANY, buildTableHashMap) state } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala index 37fdbde12e0..c57bd83d3c9 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala @@ -55,8 +55,6 @@ class HashJoinProbeOpExec[K]( if (port == 0) { Iterator.empty } else { - System.err.println(buildTableHashMap) - System.err.println(tuple) // Probe phase val key = tuple.getField(probeAttributeName).asInstanceOf[K] val (matchedTuples, joined) = diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala index 63313b0ee10..46b671200fb 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala @@ -4,6 +4,7 @@ import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple +import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType class TestAOpExec(limit: Int) extends OperatorExecutor { var count = 0 @@ -19,7 +20,7 @@ class TestAOpExec(limit: Int) extends OperatorExecutor { override def produceState(): State = { val state = State() - state.add("count", "test") + state.add("count", AttributeType.STRING, "test") state } } From 8cbb603ea4a964551d921a96830302966b8876c5 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sat, 3 Aug 2024 06:54:49 -0700 Subject: [PATCH 021/163] update --- .../amber/engine/common/ambermessage.proto | 2 +- .../src/main/python/core/models/payload.py | 6 ++- .../python/core/runnables/network_receiver.py | 7 +-- .../python/core/runnables/network_sender.py | 4 +- .../uci/ics/amber/engine/common/__init__.py | 44 +++++++++---------- .../pythonworker/PythonProxyClient.scala | 10 ++--- .../pythonworker/PythonProxyServer.scala | 5 +-- .../architecture/worker/DataProcessor.scala | 3 +- .../ics/texera/workflow/common/Marker.scala | 7 ++- .../ambermessage/AmbermessageProto.scala | 12 ++--- .../ambermessage/PythonDataHeader.scala | 42 +++++++++--------- 11 files changed, 71 insertions(+), 71 deletions(-) diff --git a/core/amber/src/main/protobuf/edu/uci/ics/amber/engine/common/ambermessage.proto b/core/amber/src/main/protobuf/edu/uci/ics/amber/engine/common/ambermessage.proto index 83d773e23ab..df275466e96 100644 --- a/core/amber/src/main/protobuf/edu/uci/ics/amber/engine/common/ambermessage.proto +++ b/core/amber/src/main/protobuf/edu/uci/ics/amber/engine/common/ambermessage.proto @@ -32,7 +32,7 @@ message ControlPayloadV2 { message PythonDataHeader { common.ActorVirtualIdentity tag = 1 [(scalapb.field).no_box = true]; - bool is_end = 2; + string marker = 2; } message PythonControlMessage { diff --git a/core/amber/src/main/python/core/models/payload.py b/core/amber/src/main/python/core/models/payload.py index 09332877dce..40b8f3e6eb7 100644 --- a/core/amber/src/main/python/core/models/payload.py +++ b/core/amber/src/main/python/core/models/payload.py @@ -24,4 +24,8 @@ class OutputDataFrame(DataPayload): @dataclass class EndOfUpstream(DataPayload): - pass + def __str__(self): + return "EndOfUpstream" + + def __eq__(self, other): + return str(self) == other diff --git a/core/amber/src/main/python/core/runnables/network_receiver.py b/core/amber/src/main/python/core/runnables/network_receiver.py index dc427248901..27639944412 100644 --- a/core/amber/src/main/python/core/runnables/network_receiver.py +++ b/core/amber/src/main/python/core/runnables/network_receiver.py @@ -62,14 +62,15 @@ def data_handler(command: bytes, table: Table) -> int: :return: sender credits """ data_header = PythonDataHeader().parse(command) - if not data_header.is_end: + if data_header.marker == EndOfUpstream(): shared_queue.put( - DataElement(tag=data_header.tag, payload=InputDataFrame(table)) + DataElement(tag=data_header.tag, payload=EndOfUpstream()) ) else: shared_queue.put( - DataElement(tag=data_header.tag, payload=EndOfUpstream()) + DataElement(tag=data_header.tag, payload=InputDataFrame(table)) ) + return shared_queue.in_mem_size() self._proxy_server.register_data_handler(data_handler) diff --git a/core/amber/src/main/python/core/runnables/network_sender.py b/core/amber/src/main/python/core/runnables/network_sender.py index eddaad89620..ea1f83946a5 100644 --- a/core/amber/src/main/python/core/runnables/network_sender.py +++ b/core/amber/src/main/python/core/runnables/network_sender.py @@ -61,11 +61,11 @@ def _send_data(self, to: ActorVirtualIdentity, data_payload: DataPayload) -> Non {name: [t[name] for t in data_payload.frame] for name in field_names}, schema=data_payload.schema.as_arrow_schema(), ) - data_header = PythonDataHeader(tag=to, is_end=False) + data_header = PythonDataHeader(tag=to, marker="data") self._proxy_client.send_data(bytes(data_header), table) # returns credits elif isinstance(data_payload, EndOfUpstream): - data_header = PythonDataHeader(tag=to, is_end=True) + data_header = PythonDataHeader(tag=to, marker=str(EndOfUpstream())) self._proxy_client.send_data(bytes(data_header), None) # returns credits else: diff --git a/core/amber/src/main/python/proto/edu/uci/ics/amber/engine/common/__init__.py b/core/amber/src/main/python/proto/edu/uci/ics/amber/engine/common/__init__.py index 62b90b1314e..b02d8f6c0e6 100644 --- a/core/amber/src/main/python/proto/edu/uci/ics/amber/engine/common/__init__.py +++ b/core/amber/src/main/python/proto/edu/uci/ics/amber/engine/common/__init__.py @@ -78,6 +78,27 @@ class PhysicalLink(betterproto.Message): to_port_id: "PortIdentity" = betterproto.message_field(4) +@dataclass(eq=False, repr=False) +class Backpressure(betterproto.Message): + enable_backpressure: bool = betterproto.bool_field(1) + + +@dataclass(eq=False, repr=False) +class CreditUpdate(betterproto.Message): + pass + + +@dataclass(eq=False, repr=False) +class ActorCommand(betterproto.Message): + backpressure: "Backpressure" = betterproto.message_field(1, group="sealed_value") + credit_update: "CreditUpdate" = betterproto.message_field(2, group="sealed_value") + + +@dataclass(eq=False, repr=False) +class PythonActorMessage(betterproto.Message): + payload: "ActorCommand" = betterproto.message_field(1) + + @dataclass(eq=False, repr=False) class ControlInvocationV2(betterproto.Message): command_id: int = betterproto.int64_field(1) @@ -105,31 +126,10 @@ class ControlPayloadV2(betterproto.Message): @dataclass(eq=False, repr=False) class PythonDataHeader(betterproto.Message): tag: "ActorVirtualIdentity" = betterproto.message_field(1) - is_end: bool = betterproto.bool_field(2) + marker: str = betterproto.string_field(2) @dataclass(eq=False, repr=False) class PythonControlMessage(betterproto.Message): tag: "ActorVirtualIdentity" = betterproto.message_field(1) payload: "ControlPayloadV2" = betterproto.message_field(2) - - -@dataclass(eq=False, repr=False) -class Backpressure(betterproto.Message): - enable_backpressure: bool = betterproto.bool_field(1) - - -@dataclass(eq=False, repr=False) -class CreditUpdate(betterproto.Message): - pass - - -@dataclass(eq=False, repr=False) -class ActorCommand(betterproto.Message): - backpressure: "Backpressure" = betterproto.message_field(1, group="sealed_value") - credit_update: "CreditUpdate" = betterproto.message_field(2, group="sealed_value") - - -@dataclass(eq=False, repr=False) -class PythonActorMessage(betterproto.Message): - payload: "ActorCommand" = betterproto.message_field(1) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala index ae1e9f305ff..5c2539ef193 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala @@ -105,11 +105,11 @@ class PythonProxyClient(portNumberPromise: Promise[Int], val actorId: ActorVirtu case DataFrame(frame) => val tuples: mutable.Queue[Tuple] = mutable.Queue(frame.map(_.asInstanceOf[Tuple]).toSeq: _*) - writeArrowStream(tuples, from, isEnd = false) + writeArrowStream(tuples, from, "data") case MarkerFrame(frame) => frame match { case EndOfUpstream() => - writeArrowStream(mutable.Queue(), from, isEnd = true) + writeArrowStream(mutable.Queue(), from, EndOfUpstream().toString) } } } @@ -163,13 +163,13 @@ class PythonProxyClient(portNumberPromise: Promise[Int], val actorId: ActorVirtu private def writeArrowStream( tuples: mutable.Queue[Tuple], from: ActorVirtualIdentity, - isEnd: Boolean + marker: String ): Unit = { val schema = if (tuples.isEmpty) new Schema() else tuples.front.getSchema - val descriptor = FlightDescriptor.command(PythonDataHeader(from, isEnd).toByteArray) + val descriptor = FlightDescriptor.command(PythonDataHeader(from, marker).toByteArray) logger.debug( - s"sending data with descriptor ${PythonDataHeader(from, isEnd)}, schema $schema, size of batch ${tuples.size}" + s"sending data with descriptor ${PythonDataHeader(from, marker)}, schema $schema, size of batch ${tuples.size}" ) val flightListener = new SyncPutListener val schemaRoot = VectorSchemaRoot.create(ArrowUtils.fromTexeraSchema(schema), allocator) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala index 419a9a1be66..bee3efc0d0e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala @@ -84,8 +84,6 @@ private class AmberProducer( val dataHeader: PythonDataHeader = PythonDataHeader .parseFrom(flightStream.getDescriptor.getCommand) val to: ActorVirtualIdentity = dataHeader.tag - val isEnd: Boolean = dataHeader.isEnd - val root = flightStream.getRoot // send back ack with credits on ackStream @@ -105,8 +103,7 @@ private class AmberProducer( // closing the stream will release the dictionaries flightStream.takeDictionaryOwnership - if (isEnd) { - // EndOfUpstream + if (dataHeader.marker == EndOfUpstream().toString) { assert(root.getRowCount == 0) outputPort.sendTo(to, MarkerFrame(EndOfUpstream())) } else { diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index 0e55dfdaadc..b8e26b8c8a7 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -21,7 +21,7 @@ import edu.uci.ics.amber.engine.common.virtualidentity.util.{CONTROLLER, SELF} import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, ChannelIdentity} import edu.uci.ics.amber.engine.common.workflow.PortIdentity import edu.uci.ics.amber.error.ErrorUtils.{mkConsoleMessage, safely} -import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfUpstream, State} +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, State} import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple @@ -201,7 +201,6 @@ class DataProcessor( marker match { case state: State => processInputState(state, portId.id) - case StartOfUpstream() => case EndOfUpstream() => this.inputManager.getPort(portId).channels(channelId) = true if (inputManager.isPortCompleted(portId)) { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index b4ad512b0f4..f6312aefb00 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -6,9 +6,9 @@ import scala.collection.mutable sealed trait Marker -final case class StartOfUpstream() extends Marker - -final case class EndOfUpstream() extends Marker +final case class EndOfUpstream() extends Marker { + override def toString: String = "EndOfUpstream" +} final case class State() extends Marker { val list: mutable.Map[String, (AttributeType, Any)] = mutable.HashMap() @@ -21,4 +21,3 @@ final case class State() extends Marker { list(key)._2 } } - diff --git a/core/amber/src/main/scalapb/edu/uci/ics/amber/engine/common/ambermessage/AmbermessageProto.scala b/core/amber/src/main/scalapb/edu/uci/ics/amber/engine/common/ambermessage/AmbermessageProto.scala index 4ce6cfcc5b2..d1d6e07fd79 100644 --- a/core/amber/src/main/scalapb/edu/uci/ics/amber/engine/common/ambermessage/AmbermessageProto.scala +++ b/core/amber/src/main/scalapb/edu/uci/ics/amber/engine/common/ambermessage/AmbermessageProto.scala @@ -35,12 +35,12 @@ object AmbermessageProto extends _root_.scalapb.GeneratedFileObject { mVkdS51Y2kuaWNzLmFtYmVyLmVuZ2luZS5jb21tb24uQ29udHJvbEludm9jYXRpb25WMkIW4j8TEhFjb250cm9sSW52b2NhdGlvb kgAUhFjb250cm9sSW52b2NhdGlvbhJ5ChFyZXR1cm5faW52b2NhdGlvbhgCIAEoCzIzLmVkdS51Y2kuaWNzLmFtYmVyLmVuZ2luZ S5jb21tb24uUmV0dXJuSW52b2NhdGlvblYyQhXiPxISEHJldHVybkludm9jYXRpb25IAFIQcmV0dXJuSW52b2NhdGlvbkIOCgxzZ - WFsZWRfdmFsdWUiiwEKEFB5dGhvbkRhdGFIZWFkZXISVAoDdGFnGAEgASgLMjUuZWR1LnVjaS5pY3MuYW1iZXIuZW5naW5lLmNvb - W1vbi5BY3RvclZpcnR1YWxJZGVudGl0eUIL4j8IEgN0YWfwAQFSA3RhZxIhCgZpc19lbmQYAiABKAhCCuI/BxIFaXNFbmRSBWlzR - W5kIsoBChRQeXRob25Db250cm9sTWVzc2FnZRJUCgN0YWcYASABKAsyNS5lZHUudWNpLmljcy5hbWJlci5lbmdpbmUuY29tbW9uL - kFjdG9yVmlydHVhbElkZW50aXR5QgviPwgSA3RhZ/ABAVIDdGFnElwKB3BheWxvYWQYAiABKAsyMS5lZHUudWNpLmljcy5hbWJlc - i5lbmdpbmUuY29tbW9uLkNvbnRyb2xQYXlsb2FkVjJCD+I/DBIHcGF5bG9hZPABAVIHcGF5bG9hZEIJ4j8GSABYAHgBYgZwcm90b - zM=""" + WFsZWRfdmFsdWUijQEKEFB5dGhvbkRhdGFIZWFkZXISVAoDdGFnGAEgASgLMjUuZWR1LnVjaS5pY3MuYW1iZXIuZW5naW5lLmNvb + W1vbi5BY3RvclZpcnR1YWxJZGVudGl0eUIL4j8IEgN0YWfwAQFSA3RhZxIjCgZtYXJrZXIYAiABKAlCC+I/CBIGbWFya2VyUgZtY + XJrZXIiygEKFFB5dGhvbkNvbnRyb2xNZXNzYWdlElQKA3RhZxgBIAEoCzI1LmVkdS51Y2kuaWNzLmFtYmVyLmVuZ2luZS5jb21tb + 24uQWN0b3JWaXJ0dWFsSWRlbnRpdHlCC+I/CBIDdGFn8AEBUgN0YWcSXAoHcGF5bG9hZBgCIAEoCzIxLmVkdS51Y2kuaWNzLmFtY + mVyLmVuZ2luZS5jb21tb24uQ29udHJvbFBheWxvYWRWMkIP4j8MEgdwYXlsb2Fk8AEBUgdwYXlsb2FkQgniPwZIAFgAeAFiBnByb + 3RvMw==""" ).mkString) lazy val scalaDescriptor: _root_.scalapb.descriptors.FileDescriptor = { val scalaProto = com.google.protobuf.descriptor.FileDescriptorProto.parseFrom(ProtoBytes) diff --git a/core/amber/src/main/scalapb/edu/uci/ics/amber/engine/common/ambermessage/PythonDataHeader.scala b/core/amber/src/main/scalapb/edu/uci/ics/amber/engine/common/ambermessage/PythonDataHeader.scala index 40f91244a14..80f6a611f90 100644 --- a/core/amber/src/main/scalapb/edu/uci/ics/amber/engine/common/ambermessage/PythonDataHeader.scala +++ b/core/amber/src/main/scalapb/edu/uci/ics/amber/engine/common/ambermessage/PythonDataHeader.scala @@ -8,7 +8,7 @@ package edu.uci.ics.amber.engine.common.ambermessage @SerialVersionUID(0L) final case class PythonDataHeader( tag: edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity, - isEnd: _root_.scala.Boolean + marker: _root_.scala.Predef.String ) extends scalapb.GeneratedMessage with scalapb.lenses.Updatable[PythonDataHeader] { @transient private[this] var __serializedSizeCachedValue: _root_.scala.Int = 0 @@ -23,9 +23,9 @@ final case class PythonDataHeader( }; { - val __value = isEnd - if (__value != false) { - __size += _root_.com.google.protobuf.CodedOutputStream.computeBoolSize(2, __value) + val __value = marker + if (!__value.isEmpty) { + __size += _root_.com.google.protobuf.CodedOutputStream.computeStringSize(2, __value) } }; __size @@ -48,14 +48,14 @@ final case class PythonDataHeader( } }; { - val __v = isEnd - if (__v != false) { - _output__.writeBool(2, __v) + val __v = marker + if (!__v.isEmpty) { + _output__.writeString(2, __v) } }; } def withTag(__v: edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity): PythonDataHeader = copy(tag = __v) - def withIsEnd(__v: _root_.scala.Boolean): PythonDataHeader = copy(isEnd = __v) + def withMarker(__v: _root_.scala.Predef.String): PythonDataHeader = copy(marker = __v) def getFieldByNumber(__fieldNumber: _root_.scala.Int): _root_.scala.Any = { (__fieldNumber: @_root_.scala.unchecked) match { case 1 => { @@ -63,8 +63,8 @@ final case class PythonDataHeader( if (__t != edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity.defaultInstance) __t else null } case 2 => { - val __t = isEnd - if (__t != false) __t else null + val __t = marker + if (__t != "") __t else null } } } @@ -72,7 +72,7 @@ final case class PythonDataHeader( _root_.scala.Predef.require(__field.containingMessage eq companion.scalaDescriptor) (__field.number: @_root_.scala.unchecked) match { case 1 => tag.toPMessage - case 2 => _root_.scalapb.descriptors.PBoolean(isEnd) + case 2 => _root_.scalapb.descriptors.PString(marker) } } def toProtoString: _root_.scala.Predef.String = _root_.scalapb.TextFormat.printToSingleLineUnicodeString(this) @@ -84,7 +84,7 @@ object PythonDataHeader extends scalapb.GeneratedMessageCompanion[edu.uci.ics.am implicit def messageCompanion: scalapb.GeneratedMessageCompanion[edu.uci.ics.amber.engine.common.ambermessage.PythonDataHeader] = this def parseFrom(`_input__`: _root_.com.google.protobuf.CodedInputStream): edu.uci.ics.amber.engine.common.ambermessage.PythonDataHeader = { var __tag: _root_.scala.Option[edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity] = _root_.scala.None - var __isEnd: _root_.scala.Boolean = false + var __marker: _root_.scala.Predef.String = "" var _done__ = false while (!_done__) { val _tag__ = _input__.readTag() @@ -92,14 +92,14 @@ object PythonDataHeader extends scalapb.GeneratedMessageCompanion[edu.uci.ics.am case 0 => _done__ = true case 10 => __tag = _root_.scala.Some(__tag.fold(_root_.scalapb.LiteParser.readMessage[edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity](_input__))(_root_.scalapb.LiteParser.readMessage(_input__, _))) - case 16 => - __isEnd = _input__.readBool() + case 18 => + __marker = _input__.readStringRequireUtf8() case tag => _input__.skipField(tag) } } edu.uci.ics.amber.engine.common.ambermessage.PythonDataHeader( tag = __tag.getOrElse(edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity.defaultInstance), - isEnd = __isEnd + marker = __marker ) } implicit def messageReads: _root_.scalapb.descriptors.Reads[edu.uci.ics.amber.engine.common.ambermessage.PythonDataHeader] = _root_.scalapb.descriptors.Reads{ @@ -107,7 +107,7 @@ object PythonDataHeader extends scalapb.GeneratedMessageCompanion[edu.uci.ics.am _root_.scala.Predef.require(__fieldsMap.keys.forall(_.containingMessage eq scalaDescriptor), "FieldDescriptor does not match message type.") edu.uci.ics.amber.engine.common.ambermessage.PythonDataHeader( tag = __fieldsMap.get(scalaDescriptor.findFieldByNumber(1).get).map(_.as[edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity]).getOrElse(edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity.defaultInstance), - isEnd = __fieldsMap.get(scalaDescriptor.findFieldByNumber(2).get).map(_.as[_root_.scala.Boolean]).getOrElse(false) + marker = __fieldsMap.get(scalaDescriptor.findFieldByNumber(2).get).map(_.as[_root_.scala.Predef.String]).getOrElse("") ) case _ => throw new RuntimeException("Expected PMessage") } @@ -124,20 +124,20 @@ object PythonDataHeader extends scalapb.GeneratedMessageCompanion[edu.uci.ics.am def enumCompanionForFieldNumber(__fieldNumber: _root_.scala.Int): _root_.scalapb.GeneratedEnumCompanion[_] = throw new MatchError(__fieldNumber) lazy val defaultInstance = edu.uci.ics.amber.engine.common.ambermessage.PythonDataHeader( tag = edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity.defaultInstance, - isEnd = false + marker = "" ) implicit class PythonDataHeaderLens[UpperPB](_l: _root_.scalapb.lenses.Lens[UpperPB, edu.uci.ics.amber.engine.common.ambermessage.PythonDataHeader]) extends _root_.scalapb.lenses.ObjectLens[UpperPB, edu.uci.ics.amber.engine.common.ambermessage.PythonDataHeader](_l) { def tag: _root_.scalapb.lenses.Lens[UpperPB, edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity] = field(_.tag)((c_, f_) => c_.copy(tag = f_)) - def isEnd: _root_.scalapb.lenses.Lens[UpperPB, _root_.scala.Boolean] = field(_.isEnd)((c_, f_) => c_.copy(isEnd = f_)) + def marker: _root_.scalapb.lenses.Lens[UpperPB, _root_.scala.Predef.String] = field(_.marker)((c_, f_) => c_.copy(marker = f_)) } final val TAG_FIELD_NUMBER = 1 - final val IS_END_FIELD_NUMBER = 2 + final val MARKER_FIELD_NUMBER = 2 def of( tag: edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity, - isEnd: _root_.scala.Boolean + marker: _root_.scala.Predef.String ): _root_.edu.uci.ics.amber.engine.common.ambermessage.PythonDataHeader = _root_.edu.uci.ics.amber.engine.common.ambermessage.PythonDataHeader( tag, - isEnd + marker ) // @@protoc_insertion_point(GeneratedMessageCompanion[edu.uci.ics.amber.engine.common.PythonDataHeader]) } From 4d4babcd826fb6aae3cf1141304bd6d0fe3ceb7c Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sat, 3 Aug 2024 21:57:50 -0700 Subject: [PATCH 022/163] update --- .../uci/ics/texera/workflow/common/Marker.scala | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index f6312aefb00..2e827f30f56 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -1,14 +1,13 @@ package edu.uci.ics.texera.workflow.common -import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType +import edu.uci.ics.texera.workflow.common.tuple.Tuple +import edu.uci.ics.texera.workflow.common.tuple.schema.{AttributeType, Schema} import scala.collection.mutable sealed trait Marker -final case class EndOfUpstream() extends Marker { - override def toString: String = "EndOfUpstream" -} +final case class EndOfUpstream() extends Marker final case class State() extends Marker { val list: mutable.Map[String, (AttributeType, Any)] = mutable.HashMap() @@ -20,4 +19,14 @@ final case class State() extends Marker { def get(key: String): Any = { list(key)._2 } + + def toTuple: Tuple = { + val schemaBuilder = Schema.builder() + for ((name, (attributeType, _)) <- list) { + schemaBuilder.add(name, attributeType) + } + val tupleBuilder = Tuple.builder(schemaBuilder.build()) + tupleBuilder.addSequentially(list.values.map(_._2).toArray) + tupleBuilder.build() + } } From bcad4bf999ea5f0400c801af3d2fc0c865e00774 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sat, 3 Aug 2024 23:23:10 -0700 Subject: [PATCH 023/163] update --- .../src/main/python/core/models/payload.py | 2 ++ .../python/core/runnables/network_receiver.py | 18 ++++++------------ .../python/core/runnables/network_sender.py | 2 +- .../pythonworker/PythonProxyClient.scala | 4 ++-- .../pythonworker/PythonProxyServer.scala | 2 +- 5 files changed, 12 insertions(+), 16 deletions(-) diff --git a/core/amber/src/main/python/core/models/payload.py b/core/amber/src/main/python/core/models/payload.py index 40b8f3e6eb7..1b085332fbc 100644 --- a/core/amber/src/main/python/core/models/payload.py +++ b/core/amber/src/main/python/core/models/payload.py @@ -24,6 +24,8 @@ class OutputDataFrame(DataPayload): @dataclass class EndOfUpstream(DataPayload): + frame: Optional[Table] = None + def __str__(self): return "EndOfUpstream" diff --git a/core/amber/src/main/python/core/runnables/network_receiver.py b/core/amber/src/main/python/core/runnables/network_receiver.py index 27639944412..79e63459ecf 100644 --- a/core/amber/src/main/python/core/runnables/network_receiver.py +++ b/core/amber/src/main/python/core/runnables/network_receiver.py @@ -12,10 +12,7 @@ from core.architecture.handlers.actorcommand.credit_update_handler import ( CreditUpdateHandler, ) -from core.models import ( - InputDataFrame, - EndOfUpstream, -) +import core.models.payload from core.models.internal_queue import DataElement, ControlElement, InternalQueue from core.proxy import ProxyServer from core.util import Stoppable, get_one_of @@ -62,14 +59,11 @@ def data_handler(command: bytes, table: Table) -> int: :return: sender credits """ data_header = PythonDataHeader().parse(command) - if data_header.marker == EndOfUpstream(): - shared_queue.put( - DataElement(tag=data_header.tag, payload=EndOfUpstream()) - ) - else: - shared_queue.put( - DataElement(tag=data_header.tag, payload=InputDataFrame(table)) - ) + shared_queue.put( + DataElement( + tag=data_header.tag, + payload=getattr(core.models.payload, data_header.marker)(table)) + ) return shared_queue.in_mem_size() diff --git a/core/amber/src/main/python/core/runnables/network_sender.py b/core/amber/src/main/python/core/runnables/network_sender.py index ea1f83946a5..cedf4dac333 100644 --- a/core/amber/src/main/python/core/runnables/network_sender.py +++ b/core/amber/src/main/python/core/runnables/network_sender.py @@ -61,7 +61,7 @@ def _send_data(self, to: ActorVirtualIdentity, data_payload: DataPayload) -> Non {name: [t[name] for t in data_payload.frame] for name in field_names}, schema=data_payload.schema.as_arrow_schema(), ) - data_header = PythonDataHeader(tag=to, marker="data") + data_header = PythonDataHeader(tag=to, marker="InputDataFrame") self._proxy_client.send_data(bytes(data_header), table) # returns credits elif isinstance(data_payload, EndOfUpstream): diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala index 5c2539ef193..0f690d4492a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala @@ -105,11 +105,11 @@ class PythonProxyClient(portNumberPromise: Promise[Int], val actorId: ActorVirtu case DataFrame(frame) => val tuples: mutable.Queue[Tuple] = mutable.Queue(frame.map(_.asInstanceOf[Tuple]).toSeq: _*) - writeArrowStream(tuples, from, "data") + writeArrowStream(tuples, from, "InputDataFrame") case MarkerFrame(frame) => frame match { case EndOfUpstream() => - writeArrowStream(mutable.Queue(), from, EndOfUpstream().toString) + writeArrowStream(mutable.Queue(), from, EndOfUpstream().getClass.getSimpleName) } } } diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala index bee3efc0d0e..dd2961cefae 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala @@ -103,7 +103,7 @@ private class AmberProducer( // closing the stream will release the dictionaries flightStream.takeDictionaryOwnership - if (dataHeader.marker == EndOfUpstream().toString) { + if (dataHeader.marker == EndOfUpstream().getClass.getSimpleName) { assert(root.getRowCount == 0) outputPort.sendTo(to, MarkerFrame(EndOfUpstream())) } else { From 98e98ebaff047580e25da65fdd5ebbc3001c9b98 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sat, 3 Aug 2024 23:48:07 -0700 Subject: [PATCH 024/163] update --- .../messaginglayer/OutputManager.scala | 2 +- .../pythonworker/PythonProxyClient.scala | 3 +- .../pythonworker/PythonProxyServer.scala | 5 +-- .../partitioners/Partitioner.scala | 4 +-- .../architecture/worker/DataProcessor.scala | 1 - .../worker/promisehandlers/StartHandler.scala | 3 +- .../common/ambermessage/DataPayload.scala | 34 +++++++++++++++++-- .../ics/texera/workflow/common/Marker.scala | 32 ----------------- .../common/operators/OperatorExecutor.scala | 2 +- .../hashJoin/HashJoinBuildOpExec.scala | 2 +- .../hashJoin/HashJoinProbeOpExec.scala | 3 +- .../workflow/operators/test/TestAOpExec.scala | 2 +- .../workflow/operators/test/TestBOpExec.scala | 2 +- 13 files changed, 42 insertions(+), 53 deletions(-) delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManager.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManager.scala index 73320bd1536..883d8dbcc39 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManager.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManager.scala @@ -5,11 +5,11 @@ import edu.uci.ics.amber.engine.architecture.sendsemantics.partitioners._ import edu.uci.ics.amber.engine.architecture.sendsemantics.partitionings._ import edu.uci.ics.amber.engine.architecture.worker.DataProcessor.{FinalizeExecutor, FinalizePort} import edu.uci.ics.amber.engine.common.AmberLogging +import edu.uci.ics.amber.engine.common.ambermessage.Marker import edu.uci.ics.amber.engine.common.rpc.AsyncRPCServer.ControlCommand import edu.uci.ics.amber.engine.common.tuple.amber.{SchemaEnforceable, TupleLike} import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, ChannelIdentity} import edu.uci.ics.amber.engine.common.workflow.{PhysicalLink, PortIdentity} -import edu.uci.ics.texera.workflow.common.Marker import edu.uci.ics.texera.workflow.common.tuple.schema.Schema import scala.collection.mutable diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala index 0f690d4492a..692afd54245 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala @@ -17,7 +17,6 @@ import edu.uci.ics.amber.engine.common.ambermessage.InvocationConvertUtils.{ import edu.uci.ics.amber.engine.common.ambermessage.{PythonControlMessage, _} import edu.uci.ics.amber.engine.common.rpc.AsyncRPCClient.{ControlInvocation, ReturnInvocation} import edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity -import edu.uci.ics.texera.workflow.common.EndOfUpstream import edu.uci.ics.texera.workflow.common.tuple.Tuple import edu.uci.ics.texera.workflow.common.tuple.schema.Schema import org.apache.arrow.flight._ @@ -109,7 +108,7 @@ class PythonProxyClient(portNumberPromise: Promise[Int], val actorId: ActorVirtu case MarkerFrame(frame) => frame match { case EndOfUpstream() => - writeArrowStream(mutable.Queue(), from, EndOfUpstream().getClass.getSimpleName) + writeArrowStream(mutable.Queue(), from, frame.getClass.getSimpleName) } } } diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala index dd2961cefae..a4324c1074e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala @@ -13,15 +13,12 @@ import edu.uci.ics.texera.workflow.common.tuple.Tuple import org.apache.arrow.flight._ import org.apache.arrow.memory.{ArrowBuf, BufferAllocator, RootAllocator} import org.apache.arrow.util.AutoCloseables - import java.nio.{ByteBuffer, ByteOrder} import java.io.IOException import java.net.ServerSocket import java.util.concurrent.atomic.AtomicInteger import scala.collection.mutable import com.twitter.util.Promise -import edu.uci.ics.texera.workflow.common.EndOfUpstream - import java.nio.charset.Charset private class AmberProducer( @@ -130,7 +127,7 @@ class PythonProxyServer( def getPortNumber: AtomicInteger = portNumber val allocator: BufferAllocator = - new RootAllocator().newChildAllocator("flight-server", 0, Long.MaxValue); + new RootAllocator().newChildAllocator("flight-server", 0, Long.MaxValue) val producer: FlightProducer = new AmberProducer(actorId, outputPort, promise) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala index a263f79c9fa..25e378e7cb5 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala @@ -2,11 +2,9 @@ package edu.uci.ics.amber.engine.architecture.sendsemantics.partitioners import edu.uci.ics.amber.engine.architecture.messaginglayer.NetworkOutputGateway import edu.uci.ics.amber.engine.common.AmberConfig -import edu.uci.ics.amber.engine.common.ambermessage.{DataFrame, MarkerFrame} +import edu.uci.ics.amber.engine.common.ambermessage.{DataFrame, EndOfUpstream, Marker, MarkerFrame} import edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity -import edu.uci.ics.texera.workflow.common.{EndOfUpstream, Marker} import edu.uci.ics.texera.workflow.common.tuple.Tuple - import scala.collection.mutable.ArrayBuffer trait Partitioner extends Serializable { diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index b8e26b8c8a7..4cb5f624d24 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -21,7 +21,6 @@ import edu.uci.ics.amber.engine.common.virtualidentity.util.{CONTROLLER, SELF} import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, ChannelIdentity} import edu.uci.ics.amber.engine.common.workflow.PortIdentity import edu.uci.ics.amber.error.ErrorUtils.{mkConsoleMessage, safely} -import edu.uci.ics.texera.workflow.common.{EndOfUpstream, State} import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala index 9a5298040ee..5e6ac53904e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala @@ -6,12 +6,11 @@ import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerState import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerState.{READY, RUNNING} import edu.uci.ics.amber.engine.common.SourceOperatorExecutor import edu.uci.ics.amber.engine.common.amberexception.WorkflowRuntimeException -import edu.uci.ics.amber.engine.common.ambermessage.MarkerFrame +import edu.uci.ics.amber.engine.common.ambermessage.{EndOfUpstream, MarkerFrame} import edu.uci.ics.amber.engine.common.rpc.AsyncRPCServer.ControlCommand import edu.uci.ics.amber.engine.common.virtualidentity.ChannelIdentity import edu.uci.ics.amber.engine.common.virtualidentity.util.SOURCE_STARTER_ACTOR import edu.uci.ics.amber.engine.common.workflow.PortIdentity -import edu.uci.ics.texera.workflow.common.EndOfUpstream object StartHandler { final case class StartWorker() extends ControlCommand[WorkerState] diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/ambermessage/DataPayload.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/ambermessage/DataPayload.scala index 64f0e6a80bd..316684b716c 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/ambermessage/DataPayload.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/ambermessage/DataPayload.scala @@ -1,11 +1,11 @@ package edu.uci.ics.amber.engine.common.ambermessage -import edu.uci.ics.texera.workflow.common.Marker import edu.uci.ics.texera.workflow.common.tuple.Tuple +import edu.uci.ics.texera.workflow.common.tuple.schema.{AttributeType, Schema} -sealed trait DataPayload extends WorkflowFIFOMessagePayload {} +import scala.collection.mutable -final case class MarkerFrame(frame: Marker) extends DataPayload +sealed trait DataPayload extends WorkflowFIFOMessagePayload {} final case class DataFrame(frame: Array[Tuple]) extends DataPayload { val inMemSize: Long = { @@ -29,3 +29,31 @@ final case class DataFrame(frame: Array[Tuple]) extends DataPayload { true } } + +final case class MarkerFrame(frame: Marker) extends DataPayload + +sealed trait Marker + +final case class EndOfUpstream() extends Marker + +final case class State() extends Marker { + val list: mutable.Map[String, (AttributeType, Any)] = mutable.HashMap() + + def add(attributeName: String, attributeType: AttributeType, field: Any): Unit = { + list.put(attributeName, (attributeType, field)) + } + + def get(key: String): Any = { + list(key)._2 + } + + def toTuple: Tuple = { + val schemaBuilder = Schema.builder() + for ((name, (attributeType, _)) <- list) { + schemaBuilder.add(name, attributeType) + } + val tupleBuilder = Tuple.builder(schemaBuilder.build()) + tupleBuilder.addSequentially(list.values.map(_._2).toArray) + tupleBuilder.build() + } +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala deleted file mode 100644 index 2e827f30f56..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ /dev/null @@ -1,32 +0,0 @@ -package edu.uci.ics.texera.workflow.common - -import edu.uci.ics.texera.workflow.common.tuple.Tuple -import edu.uci.ics.texera.workflow.common.tuple.schema.{AttributeType, Schema} - -import scala.collection.mutable - -sealed trait Marker - -final case class EndOfUpstream() extends Marker - -final case class State() extends Marker { - val list: mutable.Map[String, (AttributeType, Any)] = mutable.HashMap() - - def add(attributeName: String, attributeType: AttributeType, field: Any): Unit = { - list.put(attributeName, (attributeType, field)) - } - - def get(key: String): Any = { - list(key)._2 - } - - def toTuple: Tuple = { - val schemaBuilder = Schema.builder() - for ((name, (attributeType, _)) <- list) { - schemaBuilder.add(name, attributeType) - } - val tupleBuilder = Tuple.builder(schemaBuilder.build()) - tupleBuilder.addSequentially(list.values.map(_._2).toArray) - tupleBuilder.build() - } -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala index 769eed2dc15..5beb296703f 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala @@ -1,8 +1,8 @@ package edu.uci.ics.texera.workflow.common.operators +import edu.uci.ics.amber.engine.common.ambermessage.State import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike import edu.uci.ics.amber.engine.common.workflow.PortIdentity -import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.tuple.Tuple trait OperatorExecutor { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala index 697da47334d..ca809ea35ad 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala @@ -1,7 +1,7 @@ package edu.uci.ics.texera.workflow.operators.hashJoin +import edu.uci.ics.amber.engine.common.ambermessage.State import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike -import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala index c57bd83d3c9..35a93abf246 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala @@ -1,9 +1,10 @@ package edu.uci.ics.texera.workflow.operators.hashJoin +import edu.uci.ics.amber.engine.common.ambermessage.State import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike -import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple + import scala.collection.mutable import scala.collection.mutable.ListBuffer diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala index 46b671200fb..18cee128004 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala @@ -1,7 +1,7 @@ package edu.uci.ics.texera.workflow.operators.test +import edu.uci.ics.amber.engine.common.ambermessage.State import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike -import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala index 625e5b757a5..55f9101e842 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala @@ -1,7 +1,7 @@ package edu.uci.ics.texera.workflow.operators.test +import edu.uci.ics.amber.engine.common.ambermessage.State import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike -import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple From 7286df54f4aa6a8ff80e342ed3718a22f06a09ae Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sat, 3 Aug 2024 23:50:32 -0700 Subject: [PATCH 025/163] Revert "update" This reverts commit 98e98ebaff047580e25da65fdd5ebbc3001c9b98. --- .../messaginglayer/OutputManager.scala | 2 +- .../pythonworker/PythonProxyClient.scala | 3 +- .../pythonworker/PythonProxyServer.scala | 5 ++- .../partitioners/Partitioner.scala | 4 ++- .../architecture/worker/DataProcessor.scala | 1 + .../worker/promisehandlers/StartHandler.scala | 3 +- .../common/ambermessage/DataPayload.scala | 34 ++----------------- .../ics/texera/workflow/common/Marker.scala | 32 +++++++++++++++++ .../common/operators/OperatorExecutor.scala | 2 +- .../hashJoin/HashJoinBuildOpExec.scala | 2 +- .../hashJoin/HashJoinProbeOpExec.scala | 3 +- .../workflow/operators/test/TestAOpExec.scala | 2 +- .../workflow/operators/test/TestBOpExec.scala | 2 +- 13 files changed, 53 insertions(+), 42 deletions(-) create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManager.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManager.scala index 883d8dbcc39..73320bd1536 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManager.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManager.scala @@ -5,11 +5,11 @@ import edu.uci.ics.amber.engine.architecture.sendsemantics.partitioners._ import edu.uci.ics.amber.engine.architecture.sendsemantics.partitionings._ import edu.uci.ics.amber.engine.architecture.worker.DataProcessor.{FinalizeExecutor, FinalizePort} import edu.uci.ics.amber.engine.common.AmberLogging -import edu.uci.ics.amber.engine.common.ambermessage.Marker import edu.uci.ics.amber.engine.common.rpc.AsyncRPCServer.ControlCommand import edu.uci.ics.amber.engine.common.tuple.amber.{SchemaEnforceable, TupleLike} import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, ChannelIdentity} import edu.uci.ics.amber.engine.common.workflow.{PhysicalLink, PortIdentity} +import edu.uci.ics.texera.workflow.common.Marker import edu.uci.ics.texera.workflow.common.tuple.schema.Schema import scala.collection.mutable diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala index 692afd54245..0f690d4492a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala @@ -17,6 +17,7 @@ import edu.uci.ics.amber.engine.common.ambermessage.InvocationConvertUtils.{ import edu.uci.ics.amber.engine.common.ambermessage.{PythonControlMessage, _} import edu.uci.ics.amber.engine.common.rpc.AsyncRPCClient.{ControlInvocation, ReturnInvocation} import edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity +import edu.uci.ics.texera.workflow.common.EndOfUpstream import edu.uci.ics.texera.workflow.common.tuple.Tuple import edu.uci.ics.texera.workflow.common.tuple.schema.Schema import org.apache.arrow.flight._ @@ -108,7 +109,7 @@ class PythonProxyClient(portNumberPromise: Promise[Int], val actorId: ActorVirtu case MarkerFrame(frame) => frame match { case EndOfUpstream() => - writeArrowStream(mutable.Queue(), from, frame.getClass.getSimpleName) + writeArrowStream(mutable.Queue(), from, EndOfUpstream().getClass.getSimpleName) } } } diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala index a4324c1074e..dd2961cefae 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala @@ -13,12 +13,15 @@ import edu.uci.ics.texera.workflow.common.tuple.Tuple import org.apache.arrow.flight._ import org.apache.arrow.memory.{ArrowBuf, BufferAllocator, RootAllocator} import org.apache.arrow.util.AutoCloseables + import java.nio.{ByteBuffer, ByteOrder} import java.io.IOException import java.net.ServerSocket import java.util.concurrent.atomic.AtomicInteger import scala.collection.mutable import com.twitter.util.Promise +import edu.uci.ics.texera.workflow.common.EndOfUpstream + import java.nio.charset.Charset private class AmberProducer( @@ -127,7 +130,7 @@ class PythonProxyServer( def getPortNumber: AtomicInteger = portNumber val allocator: BufferAllocator = - new RootAllocator().newChildAllocator("flight-server", 0, Long.MaxValue) + new RootAllocator().newChildAllocator("flight-server", 0, Long.MaxValue); val producer: FlightProducer = new AmberProducer(actorId, outputPort, promise) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala index 25e378e7cb5..a263f79c9fa 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala @@ -2,9 +2,11 @@ package edu.uci.ics.amber.engine.architecture.sendsemantics.partitioners import edu.uci.ics.amber.engine.architecture.messaginglayer.NetworkOutputGateway import edu.uci.ics.amber.engine.common.AmberConfig -import edu.uci.ics.amber.engine.common.ambermessage.{DataFrame, EndOfUpstream, Marker, MarkerFrame} +import edu.uci.ics.amber.engine.common.ambermessage.{DataFrame, MarkerFrame} import edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, Marker} import edu.uci.ics.texera.workflow.common.tuple.Tuple + import scala.collection.mutable.ArrayBuffer trait Partitioner extends Serializable { diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index 4cb5f624d24..b8e26b8c8a7 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -21,6 +21,7 @@ import edu.uci.ics.amber.engine.common.virtualidentity.util.{CONTROLLER, SELF} import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, ChannelIdentity} import edu.uci.ics.amber.engine.common.workflow.PortIdentity import edu.uci.ics.amber.error.ErrorUtils.{mkConsoleMessage, safely} +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, State} import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala index 5e6ac53904e..9a5298040ee 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala @@ -6,11 +6,12 @@ import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerState import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerState.{READY, RUNNING} import edu.uci.ics.amber.engine.common.SourceOperatorExecutor import edu.uci.ics.amber.engine.common.amberexception.WorkflowRuntimeException -import edu.uci.ics.amber.engine.common.ambermessage.{EndOfUpstream, MarkerFrame} +import edu.uci.ics.amber.engine.common.ambermessage.MarkerFrame import edu.uci.ics.amber.engine.common.rpc.AsyncRPCServer.ControlCommand import edu.uci.ics.amber.engine.common.virtualidentity.ChannelIdentity import edu.uci.ics.amber.engine.common.virtualidentity.util.SOURCE_STARTER_ACTOR import edu.uci.ics.amber.engine.common.workflow.PortIdentity +import edu.uci.ics.texera.workflow.common.EndOfUpstream object StartHandler { final case class StartWorker() extends ControlCommand[WorkerState] diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/ambermessage/DataPayload.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/ambermessage/DataPayload.scala index 316684b716c..64f0e6a80bd 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/ambermessage/DataPayload.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/ambermessage/DataPayload.scala @@ -1,12 +1,12 @@ package edu.uci.ics.amber.engine.common.ambermessage +import edu.uci.ics.texera.workflow.common.Marker import edu.uci.ics.texera.workflow.common.tuple.Tuple -import edu.uci.ics.texera.workflow.common.tuple.schema.{AttributeType, Schema} - -import scala.collection.mutable sealed trait DataPayload extends WorkflowFIFOMessagePayload {} +final case class MarkerFrame(frame: Marker) extends DataPayload + final case class DataFrame(frame: Array[Tuple]) extends DataPayload { val inMemSize: Long = { frame.map(_.inMemSize).sum @@ -29,31 +29,3 @@ final case class DataFrame(frame: Array[Tuple]) extends DataPayload { true } } - -final case class MarkerFrame(frame: Marker) extends DataPayload - -sealed trait Marker - -final case class EndOfUpstream() extends Marker - -final case class State() extends Marker { - val list: mutable.Map[String, (AttributeType, Any)] = mutable.HashMap() - - def add(attributeName: String, attributeType: AttributeType, field: Any): Unit = { - list.put(attributeName, (attributeType, field)) - } - - def get(key: String): Any = { - list(key)._2 - } - - def toTuple: Tuple = { - val schemaBuilder = Schema.builder() - for ((name, (attributeType, _)) <- list) { - schemaBuilder.add(name, attributeType) - } - val tupleBuilder = Tuple.builder(schemaBuilder.build()) - tupleBuilder.addSequentially(list.values.map(_._2).toArray) - tupleBuilder.build() - } -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala new file mode 100644 index 00000000000..2e827f30f56 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -0,0 +1,32 @@ +package edu.uci.ics.texera.workflow.common + +import edu.uci.ics.texera.workflow.common.tuple.Tuple +import edu.uci.ics.texera.workflow.common.tuple.schema.{AttributeType, Schema} + +import scala.collection.mutable + +sealed trait Marker + +final case class EndOfUpstream() extends Marker + +final case class State() extends Marker { + val list: mutable.Map[String, (AttributeType, Any)] = mutable.HashMap() + + def add(attributeName: String, attributeType: AttributeType, field: Any): Unit = { + list.put(attributeName, (attributeType, field)) + } + + def get(key: String): Any = { + list(key)._2 + } + + def toTuple: Tuple = { + val schemaBuilder = Schema.builder() + for ((name, (attributeType, _)) <- list) { + schemaBuilder.add(name, attributeType) + } + val tupleBuilder = Tuple.builder(schemaBuilder.build()) + tupleBuilder.addSequentially(list.values.map(_._2).toArray) + tupleBuilder.build() + } +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala index 5beb296703f..769eed2dc15 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala @@ -1,8 +1,8 @@ package edu.uci.ics.texera.workflow.common.operators -import edu.uci.ics.amber.engine.common.ambermessage.State import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike import edu.uci.ics.amber.engine.common.workflow.PortIdentity +import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.tuple.Tuple trait OperatorExecutor { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala index ca809ea35ad..697da47334d 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala @@ -1,7 +1,7 @@ package edu.uci.ics.texera.workflow.operators.hashJoin -import edu.uci.ics.amber.engine.common.ambermessage.State import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike +import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala index 35a93abf246..c57bd83d3c9 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala @@ -1,10 +1,9 @@ package edu.uci.ics.texera.workflow.operators.hashJoin -import edu.uci.ics.amber.engine.common.ambermessage.State import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike +import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple - import scala.collection.mutable import scala.collection.mutable.ListBuffer diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala index 18cee128004..46b671200fb 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala @@ -1,7 +1,7 @@ package edu.uci.ics.texera.workflow.operators.test -import edu.uci.ics.amber.engine.common.ambermessage.State import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike +import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala index 55f9101e842..625e5b757a5 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala @@ -1,7 +1,7 @@ package edu.uci.ics.texera.workflow.operators.test -import edu.uci.ics.amber.engine.common.ambermessage.State import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike +import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple From b617d62a72c074b4b4d42b9533cb0e6ebc6489cb Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sun, 4 Aug 2024 00:16:43 -0700 Subject: [PATCH 026/163] update --- .../pythonworker/PythonProxyClient.scala | 16 ++++++++-------- .../uci/ics/texera/workflow/common/Marker.scala | 1 - 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala index 0f690d4492a..d7d624a1bb7 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala @@ -17,7 +17,7 @@ import edu.uci.ics.amber.engine.common.ambermessage.InvocationConvertUtils.{ import edu.uci.ics.amber.engine.common.ambermessage.{PythonControlMessage, _} import edu.uci.ics.amber.engine.common.rpc.AsyncRPCClient.{ControlInvocation, ReturnInvocation} import edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity -import edu.uci.ics.texera.workflow.common.EndOfUpstream +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, State} import edu.uci.ics.texera.workflow.common.tuple.Tuple import edu.uci.ics.texera.workflow.common.tuple.schema.Schema import org.apache.arrow.flight._ @@ -103,14 +103,14 @@ class PythonProxyClient(portNumberPromise: Promise[Int], val actorId: ActorVirtu def sendData(dataPayload: DataPayload, from: ActorVirtualIdentity): Unit = { dataPayload match { case DataFrame(frame) => - val tuples: mutable.Queue[Tuple] = - mutable.Queue(frame.map(_.asInstanceOf[Tuple]).toSeq: _*) - writeArrowStream(tuples, from, "InputDataFrame") - case MarkerFrame(frame) => - frame match { - case EndOfUpstream() => - writeArrowStream(mutable.Queue(), from, EndOfUpstream().getClass.getSimpleName) + val queue = mutable.Queue(frame.map(_.asInstanceOf[Tuple]): _*) + writeArrowStream(queue, from, "InputDataFrame") + case MarkerFrame(marker) => + val queue = marker match { + case state: State => mutable.Queue(state.toTuple) + case _ => mutable.Queue.empty[Tuple] } + writeArrowStream(queue, from, marker.getClass.getSimpleName) } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index 2e827f30f56..46bbf3a3492 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -2,7 +2,6 @@ package edu.uci.ics.texera.workflow.common import edu.uci.ics.texera.workflow.common.tuple.Tuple import edu.uci.ics.texera.workflow.common.tuple.schema.{AttributeType, Schema} - import scala.collection.mutable sealed trait Marker From 812706f83e10aef4ba44181e31487baf8ed35278 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sun, 4 Aug 2024 00:22:16 -0700 Subject: [PATCH 027/163] update --- .../engine/architecture/pythonworker/PythonProxyClient.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala index d7d624a1bb7..f4d6c7aa3d4 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala @@ -103,7 +103,7 @@ class PythonProxyClient(portNumberPromise: Promise[Int], val actorId: ActorVirtu def sendData(dataPayload: DataPayload, from: ActorVirtualIdentity): Unit = { dataPayload match { case DataFrame(frame) => - val queue = mutable.Queue(frame.map(_.asInstanceOf[Tuple]): _*) + val queue = mutable.Queue(frame: _*) writeArrowStream(queue, from, "InputDataFrame") case MarkerFrame(marker) => val queue = marker match { From e6e93a660a06d552bbaa5778383b8366a7034fd5 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sun, 4 Aug 2024 02:18:50 -0700 Subject: [PATCH 028/163] update --- .../src/main/python/core/models/payload.py | 6 ------ .../python/core/runnables/network_sender.py | 6 +++--- .../ics/texera/workflow/common/Marker.scala | 20 +++++++++---------- 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/core/amber/src/main/python/core/models/payload.py b/core/amber/src/main/python/core/models/payload.py index 1b085332fbc..58450e72413 100644 --- a/core/amber/src/main/python/core/models/payload.py +++ b/core/amber/src/main/python/core/models/payload.py @@ -25,9 +25,3 @@ class OutputDataFrame(DataPayload): @dataclass class EndOfUpstream(DataPayload): frame: Optional[Table] = None - - def __str__(self): - return "EndOfUpstream" - - def __eq__(self, other): - return str(self) == other diff --git a/core/amber/src/main/python/core/runnables/network_sender.py b/core/amber/src/main/python/core/runnables/network_sender.py index cedf4dac333..483b8f83a29 100644 --- a/core/amber/src/main/python/core/runnables/network_sender.py +++ b/core/amber/src/main/python/core/runnables/network_sender.py @@ -4,7 +4,7 @@ from overrides import overrides from pyarrow import Table -from core.models import OutputDataFrame, DataPayload, EndOfUpstream, InternalQueue +from core.models import OutputDataFrame, DataPayload, EndOfUpstream, InternalQueue, InputDataFrame from core.models.internal_queue import InternalQueueElement, DataElement, ControlElement from core.proxy import ProxyClient from core.util import StoppableQueueBlockingRunnable @@ -61,11 +61,11 @@ def _send_data(self, to: ActorVirtualIdentity, data_payload: DataPayload) -> Non {name: [t[name] for t in data_payload.frame] for name in field_names}, schema=data_payload.schema.as_arrow_schema(), ) - data_header = PythonDataHeader(tag=to, marker="InputDataFrame") + data_header = PythonDataHeader(tag=to, marker=InputDataFrame.__name__) self._proxy_client.send_data(bytes(data_header), table) # returns credits elif isinstance(data_payload, EndOfUpstream): - data_header = PythonDataHeader(tag=to, marker=str(EndOfUpstream())) + data_header = PythonDataHeader(tag=to, marker=EndOfUpstream.__name__) self._proxy_client.send_data(bytes(data_header), None) # returns credits else: diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index 46bbf3a3492..0461c16f482 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -1,7 +1,8 @@ package edu.uci.ics.texera.workflow.common import edu.uci.ics.texera.workflow.common.tuple.Tuple -import edu.uci.ics.texera.workflow.common.tuple.schema.{AttributeType, Schema} +import edu.uci.ics.texera.workflow.common.tuple.schema.{AttributeType, Schema, Attribute} + import scala.collection.mutable sealed trait Marker @@ -19,13 +20,12 @@ final case class State() extends Marker { list(key)._2 } - def toTuple: Tuple = { - val schemaBuilder = Schema.builder() - for ((name, (attributeType, _)) <- list) { - schemaBuilder.add(name, attributeType) - } - val tupleBuilder = Tuple.builder(schemaBuilder.build()) - tupleBuilder.addSequentially(list.values.map(_._2).toArray) - tupleBuilder.build() - } + def toTuple: Tuple = + Tuple.builder( + Schema.builder() + .add(list.map { case (name, (attrType, _)) => + new Attribute(name, attrType)}) + .build()) + .addSequentially(list.values.map(_._2).toArray) + .build() } From cdf0d88588bc0bd87f86d3cee2634724123379b3 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sun, 4 Aug 2024 05:01:29 -0700 Subject: [PATCH 029/163] update --- .../architecture/packaging/input_manager.py | 7 +++-- .../src/main/python/core/models/payload.py | 6 ++++ .../src/main/python/core/models/state.py | 9 ++++++ .../pythonworker/PythonProxyClient.scala | 11 +++---- .../ics/texera/workflow/common/Marker.scala | 6 ++-- .../hashJoin/HashJoinProbeOpExec.scala | 2 +- .../workflow/operators/test/TestAOpDesc.scala | 29 ++----------------- .../workflow/operators/test/TestAOpExec.scala | 8 +---- .../workflow/operators/test/TestBOpExec.scala | 2 +- 9 files changed, 32 insertions(+), 48 deletions(-) create mode 100644 core/amber/src/main/python/core/models/state.py diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index 6a5356f4710..1eb98bacd27 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -2,7 +2,7 @@ from core.models import Tuple, ArrowTableTupleProvider, Schema from core.models.marker import EndOfAllMarker, Marker, SenderChangeMarker -from core.models.payload import InputDataFrame, DataPayload, EndOfUpstream +from core.models.payload import InputDataFrame, DataPayload, EndOfUpstream, State, StateFrame from core.models.tuple import InputExhausted from proto.edu.uci.ics.amber.engine.common import ( ActorVirtualIdentity, @@ -96,7 +96,10 @@ def process_data_payload( self._current_channel_id = current_channel_id yield SenderChangeMarker(current_channel_id) - if isinstance(payload, InputDataFrame): + if isinstance(payload, StateFrame): + yield State(payload.frame.to_pandas().iloc[0].to_dict()) + + elif isinstance(payload, InputDataFrame): for field_accessor in ArrowTableTupleProvider(payload.frame): yield Tuple( {name: field_accessor for name in payload.frame.column_names}, diff --git a/core/amber/src/main/python/core/models/payload.py b/core/amber/src/main/python/core/models/payload.py index 58450e72413..8412b810a95 100644 --- a/core/amber/src/main/python/core/models/payload.py +++ b/core/amber/src/main/python/core/models/payload.py @@ -3,6 +3,7 @@ from typing import List, Optional from core.models.schema.schema import Schema +from core.models.state import State from core.models.tuple import Tuple @@ -25,3 +26,8 @@ class OutputDataFrame(DataPayload): @dataclass class EndOfUpstream(DataPayload): frame: Optional[Table] = None + + +@dataclass +class StateFrame(DataPayload): + frame: Table diff --git a/core/amber/src/main/python/core/models/state.py b/core/amber/src/main/python/core/models/state.py new file mode 100644 index 00000000000..1e2d263c91b --- /dev/null +++ b/core/amber/src/main/python/core/models/state.py @@ -0,0 +1,9 @@ +class State: + def __init__(self, data): + self.data = data + + def __setitem__(self, key, value): + self.data[key] = value + + def __getitem__(self, key): + return self.data[key] \ No newline at end of file diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala index f4d6c7aa3d4..302948971ef 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala @@ -102,15 +102,12 @@ class PythonProxyClient(portNumberPromise: Promise[Int], val actorId: ActorVirtu def sendData(dataPayload: DataPayload, from: ActorVirtualIdentity): Unit = { dataPayload match { - case DataFrame(frame) => - val queue = mutable.Queue(frame: _*) - writeArrowStream(queue, from, "InputDataFrame") + case DataFrame(frame) => writeArrowStream(mutable.Queue(frame: _*), from, "InputDataFrame") case MarkerFrame(marker) => - val queue = marker match { - case state: State => mutable.Queue(state.toTuple) - case _ => mutable.Queue.empty[Tuple] + marker match { + case state: State => writeArrowStream(mutable.Queue(state.toTuple), from, "StateFrame") + case _ => writeArrowStream(mutable.Queue.empty, from, marker.getClass.getSimpleName) } - writeArrowStream(queue, from, marker.getClass.getSimpleName) } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index 0461c16f482..978b2be5ba6 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -16,9 +16,9 @@ final case class State() extends Marker { list.put(attributeName, (attributeType, field)) } - def get(key: String): Any = { - list(key)._2 - } + def get(key: String): Any = list(key)._2 + + def apply(key: String): Any = get(key) def toTuple: Tuple = Tuple.builder( diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala index c57bd83d3c9..132020fa0cb 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala @@ -47,7 +47,7 @@ class HashJoinProbeOpExec[K]( var buildTableHashMap: mutable.HashMap[K, (ListBuffer[Tuple], Boolean)] = _ override def processState(state: State, port: Int): Unit = { - buildTableHashMap = state.get("hashtable").asInstanceOf[mutable.HashMap[K, (mutable.ListBuffer[Tuple], Boolean)]] + buildTableHashMap = state("hashtable").asInstanceOf[mutable.HashMap[K, (mutable.ListBuffer[Tuple], Boolean)]] } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpDesc.scala index 902124524c7..87bda654993 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpDesc.scala @@ -1,24 +1,14 @@ package edu.uci.ics.texera.workflow.operators.test -import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription} -import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort} import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} -import edu.uci.ics.texera.workflow.common.operators.{LogicalOp, StateTransferFunc} +import edu.uci.ics.texera.workflow.common.operators.LogicalOp import edu.uci.ics.texera.workflow.common.tuple.schema.Schema -import scala.util.{Success, Try} - class TestAOpDesc extends LogicalOp { - - @JsonProperty(required = true) - @JsonSchemaTitle("Limit") - @JsonPropertyDescription("the max number of output rows") - var limit: Int = _ - override def getPhysicalOp( workflowId: WorkflowIdentity, executionId: ExecutionIdentity @@ -29,7 +19,7 @@ class TestAOpDesc extends LogicalOp { executionId, operatorIdentifier, OpExecInitInfo((_, _) => { - new TestAOpExec(limit) + new TestAOpExec() }) ) .withInputPorts(operatorInfo.inputPorts) @@ -48,19 +38,4 @@ class TestAOpDesc extends LogicalOp { ) override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(0) - - override def runtimeReconfiguration( - workflowId: WorkflowIdentity, - executionId: ExecutionIdentity, - oldLogicalOp: LogicalOp, - newLogicalOp: LogicalOp - ): Try[(PhysicalOp, Option[StateTransferFunc])] = { - val newPhysicalOp = newLogicalOp.getPhysicalOp(workflowId, executionId) - val stateTransferFunc: StateTransferFunc = (oldOp, newOp) => { - val oldLimitOp = oldOp.asInstanceOf[TestAOpExec] - val newLimitOp = newOp.asInstanceOf[TestAOpExec] - newLimitOp.count = oldLimitOp.count - } - Success(newPhysicalOp, Some(stateTransferFunc)) - } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala index 46b671200fb..0e54312f1cb 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala @@ -6,16 +6,10 @@ import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType -class TestAOpExec(limit: Int) extends OperatorExecutor { - var count = 0 +class TestAOpExec extends OperatorExecutor { override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { - if (count < limit) { - count += 1 Iterator(tuple) - } else { - Iterator() - } } override def produceState(): State = { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala index 625e5b757a5..e2e65c81538 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala @@ -22,7 +22,7 @@ class TestBOpExec(limit: Int) extends OperatorExecutor { override def processState(state: State, port: Int): Unit = { - val objRepr = state.get("count") + val objRepr = state("count") println(objRepr) } } From 9749cdb183952e48df01345250b254874b5c96e4 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sun, 4 Aug 2024 15:09:38 -0700 Subject: [PATCH 030/163] update --- .../architecture/managers/tuple_processing_manager.py | 11 +++++++++++ core/amber/src/main/python/core/models/state.py | 5 ++++- .../src/main/python/core/runnables/data_processor.py | 6 ++++++ .../amber/src/main/python/core/runnables/main_loop.py | 9 +++++++++ .../sendsemantics/partitioners/Partitioner.scala | 6 ------ 5 files changed, 30 insertions(+), 7 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py index e321037eb93..c391daa40b3 100644 --- a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py +++ b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py @@ -2,6 +2,7 @@ from typing import Optional, Union, Tuple, Iterator from core.models import InputExhausted +from core.models.state import State from proto.edu.uci.ics.amber.engine.common import PortIdentity @@ -12,6 +13,8 @@ def __init__(self): self.current_input_tuple_iter: Optional[ Iterator[Union[Tuple, InputExhausted]] ] = None + self.current_input_state: Optional[State] = None + self.current_output_state: Optional[State] = None self.current_output_tuple: Optional[Tuple] = None self.context_switch_condition: Condition = Condition() self.finished_current: Event = Event() @@ -19,3 +22,11 @@ def __init__(self): def get_output_tuple(self) -> Optional[Tuple]: ret, self.current_output_tuple = self.current_output_tuple, None return ret + + def get_output_state(self) -> Optional[State]: + ret, self.current_output_state = self.current_output_state, None + return ret + + def get_input_state(self) -> Optional[State]: + ret, self.current_input_state = self.current_input_state, None + return ret \ No newline at end of file diff --git a/core/amber/src/main/python/core/models/state.py b/core/amber/src/main/python/core/models/state.py index 1e2d263c91b..079584d9a10 100644 --- a/core/amber/src/main/python/core/models/state.py +++ b/core/amber/src/main/python/core/models/state.py @@ -6,4 +6,7 @@ def __setitem__(self, key, value): self.data[key] = value def __getitem__(self, key): - return self.data[key] \ No newline at end of file + return self.data[key] + + def __repr__(self): + return str(self.data) \ No newline at end of file diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index ab761ec01dc..bc93ce7cf56 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -29,9 +29,15 @@ def run(self) -> None: self._running.set() self._switch_context() while self._running.is_set(): + self.process_state() self.process_tuple() self._switch_context() + def process_state(self) -> None: + executor = self._context.executor_manager.executor + executor.state = self._context.tuple_processing_manager.get_input_state() + self._switch_context() + def process_tuple(self) -> None: finished_current = self._context.tuple_processing_manager.finished_current while not finished_current.is_set(): diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index e0cba249d67..536787beea6 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -20,6 +20,7 @@ Tuple, ) from core.models.internal_queue import DataElement, ControlElement +from core.models.state import State from core.runnables.data_processor import DataProcessor from core.util import StoppableQueueBlockingRunnable, get_one_of, set_one_of from core.util.customized_queue.queue_base import QueueElement @@ -202,6 +203,12 @@ def _process_tuple(self, tuple_: Union[Tuple, InputExhausted]) -> None: self.process_input_tuple() self._check_and_process_control() + def _process_state(self, state: State): + self.context.tuple_processing_manager.current_input_state = state + self._check_and_process_control() + self._switch_context() + return self.context.tuple_processing_manager.get_output_tuple() + def _process_input_exhausted(self, input_exhausted: InputExhausted): self._process_tuple(input_exhausted) if self.context.tuple_processing_manager.current_input_port_id is not None: @@ -290,6 +297,8 @@ def _process_data_element(self, data_element: DataElement) -> None: self._process_sender_change_marker, EndOfAllMarker, self._process_end_of_all_marker, + State, + self._process_state, ) except Exception as err: logger.exception(err) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala index a263f79c9fa..c9b0cadc50b 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala @@ -30,13 +30,7 @@ class NetworkOutputBuffer( } } - def noMore(): Unit = { - flush() - dataOutputPort.sendTo(to, MarkerFrame(EndOfUpstream())) - } - def sendMarker(marker: Marker): Unit = { - flush() dataOutputPort.sendTo(to, MarkerFrame(marker)) } From 82367aa7aa9863a349ee14a606333d17ed6aabfa Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sun, 4 Aug 2024 15:12:11 -0700 Subject: [PATCH 031/163] update --- .../architecture/messaginglayer/OutputManager.scala | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManager.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManager.scala index 73320bd1536..0b83b29b393 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManager.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManager.scala @@ -1,6 +1,10 @@ package edu.uci.ics.amber.engine.architecture.messaginglayer -import edu.uci.ics.amber.engine.architecture.messaginglayer.OutputManager.{DPOutputIterator, getBatchSize, toPartitioner} +import edu.uci.ics.amber.engine.architecture.messaginglayer.OutputManager.{ + DPOutputIterator, + getBatchSize, + toPartitioner +} import edu.uci.ics.amber.engine.architecture.sendsemantics.partitioners._ import edu.uci.ics.amber.engine.architecture.sendsemantics.partitionings._ import edu.uci.ics.amber.engine.architecture.worker.DataProcessor.{FinalizeExecutor, FinalizePort} @@ -160,10 +164,7 @@ class OutputManager( } def emitMarker(marker: Marker): Unit = { - networkOutputBuffers.foreach(kv => { - kv._2.flush() - kv._2.sendMarker(marker) - }) + networkOutputBuffers.foreach(kv => kv._2.sendMarker(marker)) } def addPort(portId: PortIdentity, schema: Schema): Unit = { From 8402750da0e66029a747851577c0e5a2428273e7 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sun, 4 Aug 2024 15:37:21 -0700 Subject: [PATCH 032/163] update --- .../workflow/operators/test/TestBOpDesc.scala | 28 ++----------------- .../workflow/operators/test/TestBOpExec.scala | 17 +++-------- 2 files changed, 6 insertions(+), 39 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpDesc.scala index a61abaec7a4..68985650045 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpDesc.scala @@ -1,24 +1,15 @@ package edu.uci.ics.texera.workflow.operators.test -import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription} -import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort} import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} -import edu.uci.ics.texera.workflow.common.operators.{LogicalOp, StateTransferFunc} +import edu.uci.ics.texera.workflow.common.operators.LogicalOp import edu.uci.ics.texera.workflow.common.tuple.schema.Schema -import scala.util.{Success, Try} - class TestBOpDesc extends LogicalOp { - @JsonProperty(required = true) - @JsonSchemaTitle("Limit") - @JsonPropertyDescription("the max number of output rows") - var limit: Int = _ - override def getPhysicalOp( workflowId: WorkflowIdentity, executionId: ExecutionIdentity @@ -29,7 +20,7 @@ class TestBOpDesc extends LogicalOp { executionId, operatorIdentifier, OpExecInitInfo((_, _) => { - new TestBOpExec(limit) + new TestBOpExec() }) ) .withInputPorts(operatorInfo.inputPorts) @@ -48,19 +39,4 @@ class TestBOpDesc extends LogicalOp { ) override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(0) - - override def runtimeReconfiguration( - workflowId: WorkflowIdentity, - executionId: ExecutionIdentity, - oldLogicalOp: LogicalOp, - newLogicalOp: LogicalOp - ): Try[(PhysicalOp, Option[StateTransferFunc])] = { - val newPhysicalOp = newLogicalOp.getPhysicalOp(workflowId, executionId) - val stateTransferFunc: StateTransferFunc = (oldOp, newOp) => { - val oldLimitOp = oldOp.asInstanceOf[TestBOpExec] - val newLimitOp = newOp.asInstanceOf[TestBOpExec] - newLimitOp.count = oldLimitOp.count - } - Success(newPhysicalOp, Some(stateTransferFunc)) - } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala index e2e65c81538..360168508a8 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala @@ -5,24 +5,15 @@ import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple -class TestBOpExec(limit: Int) extends OperatorExecutor { - var count = 0 - - var s = "" +class TestBOpExec extends OperatorExecutor { override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { - if (count < limit) { - count += 1 - Iterator(tuple) - } else { - Iterator() - } + Iterator(tuple) } - - - override def processState(state: State, port: Int): Unit = { + override def processState(state: State, port: Int): State = { val objRepr = state("count") println(objRepr) + State() } } From 7addf6e2a0dc154d0956ec6bb692add56b027dd3 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sun, 4 Aug 2024 17:42:15 -0700 Subject: [PATCH 033/163] update --- .../sendsemantics/partitioners/Partitioner.scala | 2 ++ .../engine/architecture/worker/DataProcessor.scala | 6 ++++-- .../worker/promisehandlers/StartHandler.scala | 6 +++++- .../workflow/common/operators/OperatorExecutor.scala | 12 +++++------- .../operators/hashJoin/HashJoinProbeOpExec.scala | 3 ++- .../texera/workflow/operators/test/TestAOpExec.scala | 11 ++++++++++- 6 files changed, 28 insertions(+), 12 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala index c9b0cadc50b..af7ff3b7f6e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/sendsemantics/partitioners/Partitioner.scala @@ -31,7 +31,9 @@ class NetworkOutputBuffer( } def sendMarker(marker: Marker): Unit = { + flush() dataOutputPort.sendTo(to, MarkerFrame(marker)) + flush() } def flush(): Unit = { diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index b8e26b8c8a7..8289348e9f0 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -93,7 +93,7 @@ class DataProcessor( } private[this] def processInputState(state: State, port: Int): Unit = { - executor.processState(state, port) + outputManager.emitMarker(executor.processState(state, port)) } /** @@ -206,7 +206,9 @@ class DataProcessor( if (inputManager.isPortCompleted(portId)) { inputManager.initBatch(channelId, Array.empty) processEndOfUpstream(portId.id) - outputManager.outputIterator.appendSpecialTupleToEnd(FinalizePort(portId, input = true)) + outputManager.outputIterator.appendSpecialTupleToEnd( + FinalizePort(portId, input = true) + ) } if (inputManager.getAllPorts.forall(portId => inputManager.isPortCompleted(portId))) { // assuming all the output ports finalize after all input ports are finalized. diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala index 9a5298040ee..b4cd89ec2b0 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala @@ -11,7 +11,7 @@ import edu.uci.ics.amber.engine.common.rpc.AsyncRPCServer.ControlCommand import edu.uci.ics.amber.engine.common.virtualidentity.ChannelIdentity import edu.uci.ics.amber.engine.common.virtualidentity.util.SOURCE_STARTER_ACTOR import edu.uci.ics.amber.engine.common.workflow.PortIdentity -import edu.uci.ics.texera.workflow.common.EndOfUpstream +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, State} object StartHandler { final case class StartWorker() extends ControlCommand[WorkerState] @@ -35,6 +35,10 @@ trait StartHandler { ChannelIdentity(SOURCE_STARTER_ACTOR, dp.actorId, isControl = false), MarkerFrame(EndOfUpstream()) ) + dp.processDataPayload( + ChannelIdentity(SOURCE_STARTER_ACTOR, dp.actorId, isControl = false), + MarkerFrame(State()) + ) dp.stateManager.getCurrentState } else { throw new WorkflowRuntimeException( diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala index 769eed2dc15..c571f8e2f9a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala @@ -9,7 +9,7 @@ trait OperatorExecutor { def open(): Unit = {} - def close(): Unit = {} + def processState(state: State, port: Int): State = state def processTupleMultiPort( tuple: Tuple, @@ -19,16 +19,14 @@ trait OperatorExecutor { } def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] - + + def produceState(): State = null + def onInputFinishMultiPort(port: Int): Iterator[(TupleLike, Option[PortIdentity])] = { onFinish(port).map(t => (t, None)) } def onFinish(port: Int): Iterator[TupleLike] = Iterator.empty - def onInputStart(port: Int): Iterator[State] = Iterator.empty - - def produceState(): State = null - - def processState(state: State, port: Int): Unit = {} + def close(): Unit = {} } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala index 132020fa0cb..27ea877dedb 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala @@ -46,8 +46,9 @@ class HashJoinProbeOpExec[K]( var buildTableHashMap: mutable.HashMap[K, (ListBuffer[Tuple], Boolean)] = _ - override def processState(state: State, port: Int): Unit = { + override def processState(state: State, port: Int): State = { buildTableHashMap = state("hashtable").asInstanceOf[mutable.HashMap[K, (mutable.ListBuffer[Tuple], Boolean)]] + State() } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala index 0e54312f1cb..541e567362e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala @@ -9,7 +9,13 @@ import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType class TestAOpExec extends OperatorExecutor { override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { - Iterator(tuple) + Iterator(tuple) + } + + override def processState(state: State, port: Int): State = { + val state = State() + state.add("count", AttributeType.STRING, "test") + state } override def produceState(): State = { @@ -17,4 +23,7 @@ class TestAOpExec extends OperatorExecutor { state.add("count", AttributeType.STRING, "test") state } + + //openState + //closeState } From 81b72c445e897e253499768eb68f43c48adafb33 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sun, 4 Aug 2024 22:25:29 -0700 Subject: [PATCH 034/163] update --- .../ics/texera/workflow/common/Marker.scala | 19 ++++++++++++------ .../workflow/operators/test/TestAOpExec.scala | 20 +++++++++++-------- .../workflow/operators/test/TestBOpExec.scala | 5 ++--- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index 978b2be5ba6..c97fe6212f1 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -2,7 +2,6 @@ package edu.uci.ics.texera.workflow.common import edu.uci.ics.texera.workflow.common.tuple.Tuple import edu.uci.ics.texera.workflow.common.tuple.schema.{AttributeType, Schema, Attribute} - import scala.collection.mutable sealed trait Marker @@ -21,11 +20,19 @@ final case class State() extends Marker { def apply(key: String): Any = get(key) def toTuple: Tuple = - Tuple.builder( - Schema.builder() - .add(list.map { case (name, (attrType, _)) => - new Attribute(name, attrType)}) - .build()) + Tuple + .builder( + Schema + .builder() + .add(list.map { + case (name, (attrType, _)) => + new Attribute(name, attrType) + }) + .build() + ) .addSequentially(list.values.map(_._2).toArray) .build() + + override def toString: String = + list.map { case (key, (_, value)) => s"$key: $value" }.mkString(", ") } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala index 541e567362e..07c74fc7893 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala @@ -5,25 +5,29 @@ import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType +import scala.collection.mutable class TestAOpExec extends OperatorExecutor { - - override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { - Iterator(tuple) - } + var buffer = new mutable.ArrayBuffer[Tuple]() override def processState(state: State, port: Int): State = { val state = State() - state.add("count", AttributeType.STRING, "test") + state.add("state1", AttributeType.STRING, "before process tuple") state } + override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { + buffer += tuple + Iterator(tuple) + } + override def produceState(): State = { val state = State() - state.add("count", AttributeType.STRING, "test") + state.add("state2", AttributeType.STRING, "after process tuple") state } - //openState - //closeState + override def onFinish(port: Int): Iterator[TupleLike] = { + buffer.iterator + } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala index 360168508a8..d804b251932 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala @@ -12,8 +12,7 @@ class TestBOpExec extends OperatorExecutor { } override def processState(state: State, port: Int): State = { - val objRepr = state("count") - println(objRepr) - State() + println(state) + state } } From 6c403b8eede20e95f3311a28e1687d36dd0aafe6 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 5 Aug 2024 14:43:59 -0700 Subject: [PATCH 035/163] update --- .../workflow/common/operators/LogicalOp.scala | 16 ++++++-- .../{TestAOpDesc.scala => TestA1OpDesc.scala} | 4 +- .../{TestAOpExec.scala => TestA1OpExec.scala} | 6 +-- .../operators/test/TestA2OpDesc.scala | 41 +++++++++++++++++++ .../operators/test/TestA2OpExec.scala | 34 +++++++++++++++ 5 files changed, 92 insertions(+), 9 deletions(-) rename core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/{TestAOpDesc.scala => TestA1OpDesc.scala} (95%) rename core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/{TestAOpExec.scala => TestA1OpExec.scala} (80%) create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA2OpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA2OpExec.scala diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala index 451c77b675c..241c618dfe2 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala @@ -10,7 +10,11 @@ import com.fasterxml.jackson.annotation.{ } import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp -import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, OperatorIdentity, WorkflowIdentity} +import edu.uci.ics.amber.engine.common.virtualidentity.{ + ExecutionIdentity, + OperatorIdentity, + WorkflowIdentity +} import edu.uci.ics.amber.engine.common.workflow.PortIdentity import edu.uci.ics.texera.web.OPversion import edu.uci.ics.texera.workflow.common.metadata.{OperatorInfo, PropertyNameConstants} @@ -80,7 +84,10 @@ import edu.uci.ics.texera.workflow.operators.sklearn.{ import edu.uci.ics.texera.workflow.operators.sort.SortOpDesc import edu.uci.ics.texera.workflow.operators.sortPartitions.SortPartitionsOpDesc import edu.uci.ics.texera.workflow.operators.source.apis.reddit.RedditSearchSourceOpDesc -import edu.uci.ics.texera.workflow.operators.source.apis.twitter.v2.{TwitterFullArchiveSearchSourceOpDesc, TwitterSearchSourceOpDesc} +import edu.uci.ics.texera.workflow.operators.source.apis.twitter.v2.{ + TwitterFullArchiveSearchSourceOpDesc, + TwitterSearchSourceOpDesc +} import edu.uci.ics.texera.workflow.operators.source.fetcher.URLFetcherOpDesc import edu.uci.ics.texera.workflow.operators.source.scan.FileScanSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.scan.csv.CSVScanSourceOpDesc @@ -92,7 +99,7 @@ import edu.uci.ics.texera.workflow.operators.source.sql.mysql.MySQLSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.sql.postgresql.PostgreSQLSourceOpDesc import edu.uci.ics.texera.workflow.operators.split.SplitOpDesc import edu.uci.ics.texera.workflow.operators.symmetricDifference.SymmetricDifferenceOpDesc -import edu.uci.ics.texera.workflow.operators.test.{TestAOpDesc, TestBOpDesc} +import edu.uci.ics.texera.workflow.operators.test.{TestA1OpDesc, TestBOpDesc} import edu.uci.ics.texera.workflow.operators.typecasting.TypeCastingOpDesc import edu.uci.ics.texera.workflow.operators.udf.java.JavaUDFOpDesc import edu.uci.ics.texera.workflow.operators.udf.python.source.PythonUDFSourceOpDescV2 @@ -196,7 +203,8 @@ trait StateTransferFunc new Type(value = classOf[AsterixDBSourceOpDesc], name = "AsterixDBSource"), new Type(value = classOf[TypeCastingOpDesc], name = "TypeCasting"), new Type(value = classOf[LimitOpDesc], name = "Limit"), - new Type(value = classOf[TestAOpDesc], name = "TestA"), + new Type(value = classOf[TestA1OpDesc], name = "TestA1"), + new Type(value = classOf[TestA1OpDesc], name = "TestA2"), new Type(value = classOf[TestBOpDesc], name = "TestB"), new Type(value = classOf[RandomKSamplingOpDesc], name = "RandomKSampling"), new Type(value = classOf[ReservoirSamplingOpDesc], name = "ReservoirSampling"), diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA1OpDesc.scala similarity index 95% rename from core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpDesc.scala rename to core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA1OpDesc.scala index 87bda654993..c7e4b26ece8 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA1OpDesc.scala @@ -8,7 +8,7 @@ import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, Oper import edu.uci.ics.texera.workflow.common.operators.LogicalOp import edu.uci.ics.texera.workflow.common.tuple.schema.Schema -class TestAOpDesc extends LogicalOp { +class TestA1OpDesc extends LogicalOp { override def getPhysicalOp( workflowId: WorkflowIdentity, executionId: ExecutionIdentity @@ -19,7 +19,7 @@ class TestAOpDesc extends LogicalOp { executionId, operatorIdentifier, OpExecInitInfo((_, _) => { - new TestAOpExec() + new TestA1OpExec() }) ) .withInputPorts(operatorInfo.inputPorts) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA1OpExec.scala similarity index 80% rename from core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala rename to core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA1OpExec.scala index 07c74fc7893..b49b4133b87 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestAOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA1OpExec.scala @@ -7,12 +7,12 @@ import edu.uci.ics.texera.workflow.common.tuple.Tuple import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType import scala.collection.mutable -class TestAOpExec extends OperatorExecutor { +class TestA1OpExec extends OperatorExecutor { var buffer = new mutable.ArrayBuffer[Tuple]() override def processState(state: State, port: Int): State = { val state = State() - state.add("state1", AttributeType.STRING, "before process tuple") + state.add("state1 from A1", AttributeType.STRING, "before process tuple") state } @@ -23,7 +23,7 @@ class TestAOpExec extends OperatorExecutor { override def produceState(): State = { val state = State() - state.add("state2", AttributeType.STRING, "after process tuple") + state.add("state2 from A1", AttributeType.STRING, "after process tuple") state } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA2OpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA2OpDesc.scala new file mode 100644 index 00000000000..565978aab41 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA2OpDesc.scala @@ -0,0 +1,41 @@ +package edu.uci.ics.texera.workflow.operators.test + +import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp +import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo +import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} +import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort} +import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} +import edu.uci.ics.texera.workflow.common.operators.LogicalOp +import edu.uci.ics.texera.workflow.common.tuple.schema.Schema + +class TestA2OpDesc extends LogicalOp { + override def getPhysicalOp( + workflowId: WorkflowIdentity, + executionId: ExecutionIdentity + ): PhysicalOp = { + PhysicalOp + .oneToOnePhysicalOp( + workflowId, + executionId, + operatorIdentifier, + OpExecInitInfo((_, _) => { + new TestA2OpExec() + }) + ) + .withInputPorts(operatorInfo.inputPorts) + .withOutputPorts(operatorInfo.outputPorts) + .withParallelizable(false) + } + + override def operatorInfo: OperatorInfo = + OperatorInfo( + "TestA", + "Limit the number of output rows", + OperatorGroupConstants.CLEANING_GROUP, + inputPorts = List(InputPort()), + outputPorts = List(OutputPort()), + supportReconfiguration = true + ) + + override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(0) +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA2OpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA2OpExec.scala new file mode 100644 index 00000000000..7a0a562d506 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA2OpExec.scala @@ -0,0 +1,34 @@ +package edu.uci.ics.texera.workflow.operators.test + +import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike +import edu.uci.ics.texera.workflow.common.State +import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor +import edu.uci.ics.texera.workflow.common.tuple.Tuple +import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType + +import scala.collection.mutable + +class TestA2OpExec extends OperatorExecutor { + var buffer = new mutable.ArrayBuffer[Tuple]() + + override def processState(state: State, port: Int): State = { + val state = State() + state.add("state1 from A2", AttributeType.STRING, "before process tuple") + state + } + + override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { + buffer += tuple + Iterator(tuple) + } + + override def produceState(): State = { + val state = State() + state.add("state2 from A2", AttributeType.STRING, "after process tuple") + state + } + + override def onFinish(port: Int): Iterator[TupleLike] = { + buffer.iterator + } +} From 5931bb08d29c3b4d4022e7b3fed9d373db29780e Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 5 Aug 2024 17:11:19 -0700 Subject: [PATCH 036/163] update --- .../workflow/common/operators/LogicalOp.scala | 81 +++---------------- .../state/DualInputStateReceiverOpDesc.scala | 42 ++++++++++ .../state/DualInputStateReceiverOpExec.scala | 26 ++++++ .../StateReceiverOpDesc.scala} | 12 +-- .../StateReceiverOpExec.scala} | 4 +- .../StateSender1OpDesc.scala} | 12 +-- .../StateSender1OpExec.scala} | 4 +- .../StateSender2OpDesc.scala} | 12 +-- .../StateSender2OpExec.scala} | 4 +- 9 files changed, 105 insertions(+), 92 deletions(-) create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpExec.scala rename core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/{test/TestBOpDesc.scala => state/StateReceiverOpDesc.scala} (84%) rename core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/{test/TestBOpExec.scala => state/StateReceiverOpExec.scala} (80%) rename core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/{test/TestA1OpDesc.scala => state/StateSender1OpDesc.scala} (84%) rename core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/{test/TestA1OpExec.scala => state/StateSender1OpExec.scala} (89%) rename core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/{test/TestA2OpDesc.scala => state/StateSender2OpDesc.scala} (84%) rename core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/{test/TestA2OpExec.scala => state/StateSender2OpExec.scala} (89%) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala index 241c618dfe2..314467c113e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala @@ -1,20 +1,10 @@ package edu.uci.ics.texera.workflow.common.operators import com.fasterxml.jackson.annotation.JsonSubTypes.Type -import com.fasterxml.jackson.annotation.{ - JsonIgnore, - JsonProperty, - JsonPropertyDescription, - JsonSubTypes, - JsonTypeInfo -} +import com.fasterxml.jackson.annotation.{JsonIgnore, JsonProperty, JsonPropertyDescription, JsonSubTypes, JsonTypeInfo} import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp -import edu.uci.ics.amber.engine.common.virtualidentity.{ - ExecutionIdentity, - OperatorIdentity, - WorkflowIdentity -} +import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, OperatorIdentity, WorkflowIdentity} import edu.uci.ics.amber.engine.common.workflow.PortIdentity import edu.uci.ics.texera.web.OPversion import edu.uci.ics.texera.workflow.common.metadata.{OperatorInfo, PropertyNameConstants} @@ -34,16 +24,8 @@ import edu.uci.ics.texera.workflow.operators.intersect.IntersectOpDesc import edu.uci.ics.texera.workflow.operators.intervalJoin.IntervalJoinOpDesc import edu.uci.ics.texera.workflow.operators.keywordSearch.KeywordSearchOpDesc import edu.uci.ics.texera.workflow.operators.limit.LimitOpDesc -import edu.uci.ics.texera.workflow.operators.huggingFace.{ - HuggingFaceIrisLogisticRegressionOpDesc, - HuggingFaceSentimentAnalysisOpDesc, - HuggingFaceSpamSMSDetectionOpDesc, - HuggingFaceTextSummarizationOpDesc -} -import edu.uci.ics.texera.workflow.operators.machineLearning.sklearnAdvanced.KNNTrainer.{ - SklearnAdvancedKNNClassifierTrainerOpDesc, - SklearnAdvancedKNNRegressorTrainerOpDesc -} +import edu.uci.ics.texera.workflow.operators.huggingFace.{HuggingFaceIrisLogisticRegressionOpDesc, HuggingFaceSentimentAnalysisOpDesc, HuggingFaceSpamSMSDetectionOpDesc, HuggingFaceTextSummarizationOpDesc} +import edu.uci.ics.texera.workflow.operators.machineLearning.sklearnAdvanced.KNNTrainer.{SklearnAdvancedKNNClassifierTrainerOpDesc, SklearnAdvancedKNNRegressorTrainerOpDesc} import edu.uci.ics.texera.workflow.operators.machineLearning.sklearnAdvanced.SVCTrainer.SklearnAdvancedSVCTrainerOpDesc import edu.uci.ics.texera.workflow.operators.machineLearning.sklearnAdvanced.SVRTrainer.SVCTrainer.SklearnAdvancedSVRTrainerOpDesc import edu.uci.ics.texera.workflow.operators.projection.ProjectionOpDesc @@ -52,42 +34,11 @@ import edu.uci.ics.texera.workflow.operators.regex.RegexOpDesc import edu.uci.ics.texera.workflow.operators.reservoirsampling.ReservoirSamplingOpDesc import edu.uci.ics.texera.workflow.operators.sentiment.SentimentAnalysisOpDesc import edu.uci.ics.texera.workflow.operators.sink.managed.ProgressiveSinkOpDesc -import edu.uci.ics.texera.workflow.operators.sklearn.{ - SklearnAdaptiveBoostingOpDesc, - SklearnBaggingOpDesc, - SklearnBernoulliNaiveBayesOpDesc, - SklearnComplementNaiveBayesOpDesc, - SklearnDecisionTreeOpDesc, - SklearnDummyClassifierOpDesc, - SklearnExtraTreeOpDesc, - SklearnExtraTreesOpDesc, - SklearnGaussianNaiveBayesOpDesc, - SklearnGradientBoostingOpDesc, - SklearnKNNOpDesc, - SklearnLinearRegressionOpDesc, - SklearnLinearSVMOpDesc, - SklearnLogisticRegressionCVOpDesc, - SklearnLogisticRegressionOpDesc, - SklearnMultiLayerPerceptronOpDesc, - SklearnMultinomialNaiveBayesOpDesc, - SklearnNearestCentroidOpDesc, - SklearnPassiveAggressiveOpDesc, - SklearnPerceptronOpDesc, - SklearnPredictionOpDesc, - SklearnProbabilityCalibrationOpDesc, - SklearnRandomForestOpDesc, - SklearnRidgeCVOpDesc, - SklearnRidgeOpDesc, - SklearnSDGOpDesc, - SklearnSVMOpDesc -} +import edu.uci.ics.texera.workflow.operators.sklearn.{SklearnAdaptiveBoostingOpDesc, SklearnBaggingOpDesc, SklearnBernoulliNaiveBayesOpDesc, SklearnComplementNaiveBayesOpDesc, SklearnDecisionTreeOpDesc, SklearnDummyClassifierOpDesc, SklearnExtraTreeOpDesc, SklearnExtraTreesOpDesc, SklearnGaussianNaiveBayesOpDesc, SklearnGradientBoostingOpDesc, SklearnKNNOpDesc, SklearnLinearRegressionOpDesc, SklearnLinearSVMOpDesc, SklearnLogisticRegressionCVOpDesc, SklearnLogisticRegressionOpDesc, SklearnMultiLayerPerceptronOpDesc, SklearnMultinomialNaiveBayesOpDesc, SklearnNearestCentroidOpDesc, SklearnPassiveAggressiveOpDesc, SklearnPerceptronOpDesc, SklearnPredictionOpDesc, SklearnProbabilityCalibrationOpDesc, SklearnRandomForestOpDesc, SklearnRidgeCVOpDesc, SklearnRidgeOpDesc, SklearnSDGOpDesc, SklearnSVMOpDesc} import edu.uci.ics.texera.workflow.operators.sort.SortOpDesc import edu.uci.ics.texera.workflow.operators.sortPartitions.SortPartitionsOpDesc import edu.uci.ics.texera.workflow.operators.source.apis.reddit.RedditSearchSourceOpDesc -import edu.uci.ics.texera.workflow.operators.source.apis.twitter.v2.{ - TwitterFullArchiveSearchSourceOpDesc, - TwitterSearchSourceOpDesc -} +import edu.uci.ics.texera.workflow.operators.source.apis.twitter.v2.{TwitterFullArchiveSearchSourceOpDesc, TwitterSearchSourceOpDesc} import edu.uci.ics.texera.workflow.operators.source.fetcher.URLFetcherOpDesc import edu.uci.ics.texera.workflow.operators.source.scan.FileScanSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.scan.csv.CSVScanSourceOpDesc @@ -99,16 +50,11 @@ import edu.uci.ics.texera.workflow.operators.source.sql.mysql.MySQLSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.sql.postgresql.PostgreSQLSourceOpDesc import edu.uci.ics.texera.workflow.operators.split.SplitOpDesc import edu.uci.ics.texera.workflow.operators.symmetricDifference.SymmetricDifferenceOpDesc -import edu.uci.ics.texera.workflow.operators.test.{TestA1OpDesc, TestBOpDesc} +import edu.uci.ics.texera.workflow.operators.state.{DualInputStateReceiverOpDesc, StateReceiverOpDesc, StateSender1OpDesc, StateSender2OpDesc} import edu.uci.ics.texera.workflow.operators.typecasting.TypeCastingOpDesc import edu.uci.ics.texera.workflow.operators.udf.java.JavaUDFOpDesc import edu.uci.ics.texera.workflow.operators.udf.python.source.PythonUDFSourceOpDescV2 -import edu.uci.ics.texera.workflow.operators.udf.python.{ - DualInputPortsPythonUDFOpDescV2, - PythonLambdaFunctionOpDesc, - PythonTableReducerOpDesc, - PythonUDFOpDescV2 -} +import edu.uci.ics.texera.workflow.operators.udf.python.{DualInputPortsPythonUDFOpDescV2, PythonLambdaFunctionOpDesc, PythonTableReducerOpDesc, PythonUDFOpDescV2} import edu.uci.ics.texera.workflow.operators.udf.r.{RUDFOpDesc, RUDFSourceOpDesc} import edu.uci.ics.texera.workflow.operators.union.UnionOpDesc import edu.uci.ics.texera.workflow.operators.unneststring.UnnestStringOpDesc @@ -134,11 +80,8 @@ import edu.uci.ics.texera.workflow.operators.visualization.scatter3DChart.Scatte import edu.uci.ics.texera.workflow.operators.visualization.ScatterMatrixChart.ScatterMatrixChartOpDesc import edu.uci.ics.texera.workflow.operators.visualization.funnelPlot.FunnelPlotOpDesc import edu.uci.ics.texera.workflow.operators.visualization.tablesChart.TablesPlotOpDesc - import edu.uci.ics.texera.workflow.operators.visualization.icicleChart.IcicleChartOpDesc - import edu.uci.ics.texera.workflow.operators.visualization.continuousErrorBands.ContinuousErrorBandsOpDesc - import edu.uci.ics.texera.workflow.operators.visualization.ternaryPlot.TernaryPlotOpDesc import org.apache.commons.lang3.builder.{EqualsBuilder, HashCodeBuilder, ToStringBuilder} import org.apache.zookeeper.KeeperException.UnimplementedException @@ -146,6 +89,7 @@ import edu.uci.ics.texera.workflow.operators.machineLearning.Scorer.MachineLearn import edu.uci.ics.texera.workflow.operators.visualization.quiverPlot.QuiverPlotOpDesc import edu.uci.ics.texera.workflow.operators.visualization.contourPlot.ContourPlotOpDesc import edu.uci.ics.texera.workflow.operators.visualization.figureFactoryTable.FigureFactoryTableOpDesc + import java.util.UUID import scala.collection.mutable import scala.util.Try @@ -203,9 +147,10 @@ trait StateTransferFunc new Type(value = classOf[AsterixDBSourceOpDesc], name = "AsterixDBSource"), new Type(value = classOf[TypeCastingOpDesc], name = "TypeCasting"), new Type(value = classOf[LimitOpDesc], name = "Limit"), - new Type(value = classOf[TestA1OpDesc], name = "TestA1"), - new Type(value = classOf[TestA1OpDesc], name = "TestA2"), - new Type(value = classOf[TestBOpDesc], name = "TestB"), + new Type(value = classOf[StateSender1OpDesc], name = "State Sender1"), + new Type(value = classOf[StateSender2OpDesc], name = "State Sender2"), + new Type(value = classOf[StateReceiverOpDesc], name = "Testing State Receiver"), + new Type(value = classOf[DualInputStateReceiverOpDesc], name = "Dual Input Testing State Receiver"), new Type(value = classOf[RandomKSamplingOpDesc], name = "RandomKSampling"), new Type(value = classOf[ReservoirSamplingOpDesc], name = "ReservoirSampling"), new Type(value = classOf[HashJoinOpDesc[String]], name = "HashJoin"), diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpDesc.scala new file mode 100644 index 00000000000..f5387461816 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpDesc.scala @@ -0,0 +1,42 @@ +package edu.uci.ics.texera.workflow.operators.state + +import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp +import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo +import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} +import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort} +import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} +import edu.uci.ics.texera.workflow.common.operators.LogicalOp +import edu.uci.ics.texera.workflow.common.tuple.schema.Schema + +class DualInputStateReceiverOpDesc extends LogicalOp { + + override def getPhysicalOp( + workflowId: WorkflowIdentity, + executionId: ExecutionIdentity + ): PhysicalOp = { + PhysicalOp + .oneToOnePhysicalOp( + workflowId, + executionId, + operatorIdentifier, + OpExecInitInfo((_, _) => { + new DualInputStateReceiverOpExec() + }) + ) + .withInputPorts(operatorInfo.inputPorts) + .withOutputPorts(operatorInfo.outputPorts) + .withParallelizable(false) + } + + override def operatorInfo: OperatorInfo = + OperatorInfo( + "2 in Testing State Receiver", + "", + OperatorGroupConstants.UTILITY_GROUP, + inputPorts = List(InputPort(), InputPort()), + outputPorts = List(OutputPort()), + supportReconfiguration = true + ) + + override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(0) +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpExec.scala new file mode 100644 index 00000000000..ea6ad205f4b --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpExec.scala @@ -0,0 +1,26 @@ +package edu.uci.ics.texera.workflow.operators.state + +import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike +import edu.uci.ics.amber.engine.common.workflow.PortIdentity +import edu.uci.ics.texera.workflow.common.State +import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor +import edu.uci.ics.texera.workflow.common.tuple.Tuple + +class DualInputStateReceiverOpExec extends OperatorExecutor { + + override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { + Iterator(tuple) + } + + override def processTupleMultiPort( + tuple: Tuple, + port: Int + ): Iterator[(TupleLike, Option[PortIdentity])] = { + processTuple(tuple, port).map(t => (t, None)) + } + + override def processState(state: State, port: Int): State = { + println(port, state) + state + } +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpDesc.scala similarity index 84% rename from core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpDesc.scala rename to core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpDesc.scala index 68985650045..346084072fc 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpDesc.scala @@ -1,4 +1,4 @@ -package edu.uci.ics.texera.workflow.operators.test +package edu.uci.ics.texera.workflow.operators.state import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo @@ -8,7 +8,7 @@ import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, Oper import edu.uci.ics.texera.workflow.common.operators.LogicalOp import edu.uci.ics.texera.workflow.common.tuple.schema.Schema -class TestBOpDesc extends LogicalOp { +class StateReceiverOpDesc extends LogicalOp { override def getPhysicalOp( workflowId: WorkflowIdentity, @@ -20,7 +20,7 @@ class TestBOpDesc extends LogicalOp { executionId, operatorIdentifier, OpExecInitInfo((_, _) => { - new TestBOpExec() + new StateReceiverOpExec() }) ) .withInputPorts(operatorInfo.inputPorts) @@ -30,9 +30,9 @@ class TestBOpDesc extends LogicalOp { override def operatorInfo: OperatorInfo = OperatorInfo( - "TestB", - "Limit the number of output rows", - OperatorGroupConstants.CLEANING_GROUP, + "Testing State Receiver", + "", + OperatorGroupConstants.UTILITY_GROUP, inputPorts = List(InputPort()), outputPorts = List(OutputPort()), supportReconfiguration = true diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpExec.scala similarity index 80% rename from core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala rename to core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpExec.scala index d804b251932..41af2291e6a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestBOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpExec.scala @@ -1,11 +1,11 @@ -package edu.uci.ics.texera.workflow.operators.test +package edu.uci.ics.texera.workflow.operators.state import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple -class TestBOpExec extends OperatorExecutor { +class StateReceiverOpExec extends OperatorExecutor { override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { Iterator(tuple) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA1OpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender1OpDesc.scala similarity index 84% rename from core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA1OpDesc.scala rename to core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender1OpDesc.scala index c7e4b26ece8..8c81ab1133d 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA1OpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender1OpDesc.scala @@ -1,4 +1,4 @@ -package edu.uci.ics.texera.workflow.operators.test +package edu.uci.ics.texera.workflow.operators.state import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo @@ -8,7 +8,7 @@ import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, Oper import edu.uci.ics.texera.workflow.common.operators.LogicalOp import edu.uci.ics.texera.workflow.common.tuple.schema.Schema -class TestA1OpDesc extends LogicalOp { +class StateSender1OpDesc extends LogicalOp { override def getPhysicalOp( workflowId: WorkflowIdentity, executionId: ExecutionIdentity @@ -19,7 +19,7 @@ class TestA1OpDesc extends LogicalOp { executionId, operatorIdentifier, OpExecInitInfo((_, _) => { - new TestA1OpExec() + new StateSender1OpExec() }) ) .withInputPorts(operatorInfo.inputPorts) @@ -29,9 +29,9 @@ class TestA1OpDesc extends LogicalOp { override def operatorInfo: OperatorInfo = OperatorInfo( - "TestA", - "Limit the number of output rows", - OperatorGroupConstants.CLEANING_GROUP, + "State Sender A", + "Append State to Data", + OperatorGroupConstants.UTILITY_GROUP, inputPorts = List(InputPort()), outputPorts = List(OutputPort()), supportReconfiguration = true diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA1OpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender1OpExec.scala similarity index 89% rename from core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA1OpExec.scala rename to core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender1OpExec.scala index b49b4133b87..da6e8458d28 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA1OpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender1OpExec.scala @@ -1,4 +1,4 @@ -package edu.uci.ics.texera.workflow.operators.test +package edu.uci.ics.texera.workflow.operators.state import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike import edu.uci.ics.texera.workflow.common.State @@ -7,7 +7,7 @@ import edu.uci.ics.texera.workflow.common.tuple.Tuple import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType import scala.collection.mutable -class TestA1OpExec extends OperatorExecutor { +class StateSender1OpExec extends OperatorExecutor { var buffer = new mutable.ArrayBuffer[Tuple]() override def processState(state: State, port: Int): State = { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA2OpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender2OpDesc.scala similarity index 84% rename from core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA2OpDesc.scala rename to core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender2OpDesc.scala index 565978aab41..160a6ab1a64 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA2OpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender2OpDesc.scala @@ -1,4 +1,4 @@ -package edu.uci.ics.texera.workflow.operators.test +package edu.uci.ics.texera.workflow.operators.state import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo @@ -8,7 +8,7 @@ import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, Oper import edu.uci.ics.texera.workflow.common.operators.LogicalOp import edu.uci.ics.texera.workflow.common.tuple.schema.Schema -class TestA2OpDesc extends LogicalOp { +class StateSender2OpDesc extends LogicalOp { override def getPhysicalOp( workflowId: WorkflowIdentity, executionId: ExecutionIdentity @@ -19,7 +19,7 @@ class TestA2OpDesc extends LogicalOp { executionId, operatorIdentifier, OpExecInitInfo((_, _) => { - new TestA2OpExec() + new StateSender2OpExec() }) ) .withInputPorts(operatorInfo.inputPorts) @@ -29,9 +29,9 @@ class TestA2OpDesc extends LogicalOp { override def operatorInfo: OperatorInfo = OperatorInfo( - "TestA", - "Limit the number of output rows", - OperatorGroupConstants.CLEANING_GROUP, + "State Sender B", + "Append State to Data", + OperatorGroupConstants.UTILITY_GROUP, inputPorts = List(InputPort()), outputPorts = List(OutputPort()), supportReconfiguration = true diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA2OpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender2OpExec.scala similarity index 89% rename from core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA2OpExec.scala rename to core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender2OpExec.scala index 7a0a562d506..df61aac37f5 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/test/TestA2OpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender2OpExec.scala @@ -1,4 +1,4 @@ -package edu.uci.ics.texera.workflow.operators.test +package edu.uci.ics.texera.workflow.operators.state import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike import edu.uci.ics.texera.workflow.common.State @@ -8,7 +8,7 @@ import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType import scala.collection.mutable -class TestA2OpExec extends OperatorExecutor { +class StateSender2OpExec extends OperatorExecutor { var buffer = new mutable.ArrayBuffer[Tuple]() override def processState(state: State, port: Int): State = { From c77bc97ae2b997e3e5b6e222ac04a511bfa2f1d9 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 5 Aug 2024 18:43:13 -0700 Subject: [PATCH 037/163] update --- .../src/main/python/core/models/__init__.py | 2 + .../src/main/python/core/models/operator.py | 11 ++++++ .../python/core/runnables/data_processor.py | 38 ++++++++++++++++--- .../amber/src/main/python/pyamber/__init__.py | 2 + .../src/main/python/pytexera/__init__.py | 1 + .../state/DualInputStateReceiverOpDesc.scala | 4 +- 6 files changed, 51 insertions(+), 7 deletions(-) diff --git a/core/amber/src/main/python/core/models/__init__.py b/core/amber/src/main/python/core/models/__init__.py index faedba768ad..25d00dd3b21 100644 --- a/core/amber/src/main/python/core/models/__init__.py +++ b/core/amber/src/main/python/core/models/__init__.py @@ -7,6 +7,7 @@ from .table import Table, TableLike from .batch import Batch, BatchLike from .schema import AttributeType, Field, Schema +from .state import State from .operator import ( Operator, TupleOperator, @@ -51,4 +52,5 @@ class ExceptionInfo(NamedTuple): "AttributeType", "Field", "Schema", + "State" ] diff --git a/core/amber/src/main/python/core/models/operator.py b/core/amber/src/main/python/core/models/operator.py index cc8819f9c84..c1f4a79151d 100644 --- a/core/amber/src/main/python/core/models/operator.py +++ b/core/amber/src/main/python/core/models/operator.py @@ -8,6 +8,7 @@ from deprecated import deprecated from . import InputExhausted, Table, TableLike, Tuple, TupleLike, Batch, BatchLike +from .state import State from .table import all_output_to_tuple @@ -47,6 +48,16 @@ def close(self) -> None: """ pass + def process_state(self, state_: State, port: int) -> State: + """ + Process an input State from the given link. + + :param state_: State, a State from an input port to be processed. + :param port: int, input port index of the current exhausted port. + :return: State, producing one State object + """ + pass + class TupleOperatorV2(Operator): """ diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index bc93ce7cf56..52e07660b6b 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -34,9 +34,37 @@ def run(self) -> None: self._switch_context() def process_state(self) -> None: - executor = self._context.executor_manager.executor - executor.state = self._context.tuple_processing_manager.get_input_state() - self._switch_context() + try: + executor = self._context.executor_manager.executor + state_ = self._context.tuple_processing_manager.get_input_state() + port_id = self._context.tuple_processing_manager.current_input_port_id + port: int + if port_id is None: + # no upstream, special case for source executor. + port = 0 + else: + port = port_id.id + + + with replace_print( + self._context.worker_id, + self._context.console_message_manager.print_buf, + ): + output_iterator = executor.process_state(state_, port) + #for output in output_iterator: + #for output_tuple in all_output_to_tuple(output): + #self._set_output_tuple(output_tuple) + #self._switch_context() + + + except Exception as err: + logger.exception(err) + exc_info = sys.exc_info() + self._context.exception_manager.set_exception_info(exc_info) + self._report_exception(exc_info) + + finally: + self._switch_context() def process_tuple(self) -> None: finished_current = self._context.tuple_processing_manager.finished_current @@ -58,8 +86,8 @@ def process_tuple(self) -> None: else executor.on_finish(port) ) with replace_print( - self._context.worker_id, - self._context.console_message_manager.print_buf, + self._context.worker_id, + self._context.console_message_manager.print_buf, ): for output in output_iterator: # output could be a None, a TupleLike, or a TableLike. diff --git a/core/amber/src/main/python/pyamber/__init__.py b/core/amber/src/main/python/pyamber/__init__.py index e9eae3f2e2f..b57b2924313 100644 --- a/core/amber/src/main/python/pyamber/__init__.py +++ b/core/amber/src/main/python/pyamber/__init__.py @@ -11,6 +11,7 @@ BatchOperator, SourceOperator, TupleOperatorV2, + State ) __all__ = [ @@ -26,4 +27,5 @@ "BatchOperator", "TupleOperatorV2", "SourceOperator", + "State" ] diff --git a/core/amber/src/main/python/pytexera/__init__.py b/core/amber/src/main/python/pytexera/__init__.py index 2277b744a58..99099c21312 100644 --- a/core/amber/src/main/python/pytexera/__init__.py +++ b/core/amber/src/main/python/pytexera/__init__.py @@ -12,6 +12,7 @@ ) __all__ = [ + "State", "InputExhausted", "Tuple", "TupleLike", diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpDesc.scala index f5387461816..dfa5bc434a8 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpDesc.scala @@ -3,7 +3,7 @@ package edu.uci.ics.texera.workflow.operators.state import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} -import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort} +import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort, PortIdentity} import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} import edu.uci.ics.texera.workflow.common.operators.LogicalOp import edu.uci.ics.texera.workflow.common.tuple.schema.Schema @@ -33,7 +33,7 @@ class DualInputStateReceiverOpDesc extends LogicalOp { "2 in Testing State Receiver", "", OperatorGroupConstants.UTILITY_GROUP, - inputPorts = List(InputPort(), InputPort()), + inputPorts = List(InputPort(PortIdentity(0)), InputPort(PortIdentity(1))), outputPorts = List(OutputPort()), supportReconfiguration = true ) From 4a984b2ed13ff0ee751f7ff51bfd836878f8e192 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 5 Aug 2024 22:06:07 -0700 Subject: [PATCH 038/163] update --- .../architecture/packaging/output_manager.py | 15 +++++++-- .../hash_based_shuffle_partitioner.py | 9 ++++-- .../sendsemantics/one_to_one_partitioner.py | 8 +++-- .../architecture/sendsemantics/partitioner.py | 5 ++- .../sendsemantics/round_robin_partitioner.py | 9 ++++-- .../src/main/python/core/models/state.py | 14 ++++++-- .../python/core/runnables/data_processor.py | 13 +++----- .../main/python/core/runnables/main_loop.py | 32 +++++++++++-------- .../python/core/runnables/network_receiver.py | 1 + .../python/core/runnables/network_sender.py | 7 ++++ .../pythonworker/PythonProxyServer.scala | 8 ++++- .../ics/texera/workflow/common/Marker.scala | 6 ++++ 12 files changed, 93 insertions(+), 34 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/packaging/output_manager.py b/core/amber/src/main/python/core/architecture/packaging/output_manager.py index c64c9b17779..11f25edac57 100644 --- a/core/amber/src/main/python/core/architecture/packaging/output_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/output_manager.py @@ -19,8 +19,8 @@ from core.architecture.sendsemantics.broad_cast_partitioner import ( BroadcastPartitioner, ) -from core.models import Tuple, Schema -from core.models.payload import OutputDataFrame, DataPayload +from core.models import Tuple, Schema, State +from core.models.payload import OutputDataFrame, DataPayload, StateFrame from core.util import get_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import ( HashBasedShufflePartitioning, @@ -93,6 +93,17 @@ def tuple_to_batch( ) ) + def state_to_batch( + self, state: State + ) -> Iterator[typing.Tuple[ActorVirtualIdentity, StateFrame]]: + return chain( + *( + partitioner.add_state_to_batch(state) + for partitioner in self._partitioners.values() + ) + ) + + def emit_end_of_upstream( self, ) -> Iterable[typing.Tuple[ActorVirtualIdentity, DataPayload]]: diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py index 3010a8661b9..fc275c7b101 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py @@ -5,8 +5,8 @@ from overrides import overrides from core.architecture.sendsemantics.partitioner import Partitioner -from core.models import Tuple -from core.models.payload import OutputDataFrame, DataPayload, EndOfUpstream +from core.models import Tuple, State +from core.models.payload import OutputDataFrame, DataPayload, EndOfUpstream, StateFrame from core.util import set_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import ( HashBasedShufflePartitioning, @@ -42,6 +42,11 @@ def add_tuple_to_batch( yield receiver, OutputDataFrame(frame=batch) self.receivers[hash_code] = (receiver, list()) + @overrides + def add_state_to_batch(self, state: State): + for receiver, batch in self.receivers: + yield receiver, StateFrame(frame=state.to_table()) + @overrides def no_more(self) -> Iterator[typing.Tuple[ActorVirtualIdentity, DataPayload]]: for receiver, batch in self.receivers: diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py index dc5bfb225a6..e8750b5bde8 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py @@ -4,8 +4,8 @@ from overrides import overrides from core.architecture.sendsemantics.partitioner import Partitioner -from core.models import Tuple -from core.models.payload import OutputDataFrame, DataPayload, EndOfUpstream +from core.models import Tuple, State +from core.models.payload import OutputDataFrame, DataPayload, EndOfUpstream, StateFrame from core.util import set_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import ( OneToOnePartitioning, @@ -33,6 +33,10 @@ def add_tuple_to_batch( yield self.receiver, OutputDataFrame(frame=self.batch) self.reset() + @overrides + def add_state_to_batch(self, state: State): + yield self.receiver, StateFrame(frame=state.to_table()) + @overrides def no_more(self) -> Iterator[typing.Tuple[ActorVirtualIdentity, DataPayload]]: if len(self.batch) > 0: diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py index 0c6316995e4..6604c0b5da7 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py @@ -4,7 +4,7 @@ from betterproto import Message -from core.models import Tuple +from core.models import Tuple, State from core.models.payload import OutputDataFrame, DataPayload from core.util import get_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import Partitioning @@ -20,6 +20,9 @@ def add_tuple_to_batch( ) -> Iterator[typing.Tuple[ActorVirtualIdentity, OutputDataFrame]]: pass + def add_state_to_batch(self, state: State): + pass + def no_more(self) -> Iterator[typing.Tuple[ActorVirtualIdentity, DataPayload]]: pass diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py index 78c53260001..a4ad71a0a00 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py @@ -4,8 +4,8 @@ from overrides import overrides from core.architecture.sendsemantics.partitioner import Partitioner -from core.models import Tuple -from core.models.payload import OutputDataFrame, DataPayload, EndOfUpstream +from core.models import Tuple, State +from core.models.payload import OutputDataFrame, DataPayload, EndOfUpstream, StateFrame from core.util import set_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import ( Partitioning, @@ -35,6 +35,11 @@ def add_tuple_to_batch( self.receivers[self.round_robin_index] = (receiver, list()) self.round_robin_index = (self.round_robin_index + 1) % len(self.receivers) + @overrides + def add_state_to_batch(self, state: State): + for receiver, batch in self.receivers: + yield receiver, StateFrame(frame=state.to_table()) + @overrides def no_more(self) -> Iterator[typing.Tuple[ActorVirtualIdentity, DataPayload]]: for receiver, batch in self.receivers: diff --git a/core/amber/src/main/python/core/models/state.py b/core/amber/src/main/python/core/models/state.py index 079584d9a10..3fdd64a2269 100644 --- a/core/amber/src/main/python/core/models/state.py +++ b/core/amber/src/main/python/core/models/state.py @@ -1,3 +1,5 @@ +import pyarrow as pa + class State: def __init__(self, data): self.data = data @@ -8,5 +10,13 @@ def __setitem__(self, key, value): def __getitem__(self, key): return self.data[key] - def __repr__(self): - return str(self.data) \ No newline at end of file + def __str__(self) -> str: + content = ", ".join( + [repr(key) + ": " + repr(value) for key, value in self.data.items()] + ) + return f"State[{content}]" + + __repr__ = __str__ + + def to_table(self): + return pa.Table.from_pydict(self.data) \ No newline at end of file diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index 52e07660b6b..230bb5b2fb2 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -7,7 +7,7 @@ from loguru import logger from core.architecture.managers import Context -from core.models import Tuple, ExceptionInfo +from core.models import Tuple, ExceptionInfo, State from core.models.table import all_output_to_tuple from core.util import Stoppable from core.util.console_message.replace_print import replace_print @@ -45,17 +45,11 @@ def process_state(self) -> None: else: port = port_id.id - with replace_print( self._context.worker_id, self._context.console_message_manager.print_buf, ): - output_iterator = executor.process_state(state_, port) - #for output in output_iterator: - #for output_tuple in all_output_to_tuple(output): - #self._set_output_tuple(output_tuple) - #self._switch_context() - + self._set_output_state(executor.process_state(state_, port)) except Exception as err: logger.exception(err) @@ -112,6 +106,9 @@ def _set_output_tuple(self, output_tuple): output_tuple.finalize(self._context.output_manager.get_port().get_schema()) self._context.tuple_processing_manager.current_output_tuple = output_tuple + def _set_output_state(self, output_state: State): + self._context.tuple_processing_manager.current_output_state = output_state + def _switch_context(self) -> None: """ Notify the MainLoop thread and wait here until being switched back. diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index 536787beea6..ad109d77ae6 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -20,6 +20,7 @@ Tuple, ) from core.models.internal_queue import DataElement, ControlElement +from core.models.payload import StateFrame from core.models.state import State from core.runnables.data_processor import DataProcessor from core.util import StoppableQueueBlockingRunnable, get_one_of, set_one_of @@ -161,18 +162,19 @@ def process_input_tuple(self) -> None: self.context.tuple_processing_manager.current_input_port_id ) - for output_tuple in self.process_tuple_with_udf(): + for output_data in self.process_tuple_with_udf(): self._check_and_process_control() - if output_tuple is not None: - self.context.statistics_manager.increase_output_tuple_count( - PortIdentity(0) - ) - for ( - to, - batch, - ) in self.context.output_manager.tuple_to_batch(output_tuple): - batch.schema = self.context.output_manager.get_port().get_schema() - self._output_queue.put(DataElement(tag=to, payload=batch)) + if output_data is not None: + if isinstance(output_data, Tuple): + self.context.statistics_manager.increase_output_tuple_count( + PortIdentity(0) + ) + for (to, batch) in self.context.output_manager.tuple_to_batch(output_data): + batch.schema = self.context.output_manager.get_port().get_schema() + self._output_queue.put(DataElement(tag=to, payload=batch)) + elif isinstance(output_data, State): + for (to, batch) in self.context.output_manager.state_to_batch(output_data): + self._output_queue.put(DataElement(tag=to, payload=batch)) def process_tuple_with_udf(self) -> Iterator[Optional[Tuple]]: """ @@ -189,6 +191,9 @@ def process_tuple_with_udf(self) -> Iterator[Optional[Tuple]]: self._check_and_process_control() self._switch_context() yield self.context.tuple_processing_manager.get_output_tuple() + self._check_and_process_control() + self._switch_context() + yield self.context.tuple_processing_manager.get_output_state() def _process_control_element(self, control_element: ControlElement) -> None: """ @@ -203,11 +208,10 @@ def _process_tuple(self, tuple_: Union[Tuple, InputExhausted]) -> None: self.process_input_tuple() self._check_and_process_control() - def _process_state(self, state: State): - self.context.tuple_processing_manager.current_input_state = state + def _process_state(self, state_: State): + self.context.tuple_processing_manager.current_input_state = state_ self._check_and_process_control() self._switch_context() - return self.context.tuple_processing_manager.get_output_tuple() def _process_input_exhausted(self, input_exhausted: InputExhausted): self._process_tuple(input_exhausted) diff --git a/core/amber/src/main/python/core/runnables/network_receiver.py b/core/amber/src/main/python/core/runnables/network_receiver.py index 79e63459ecf..675cdc0c984 100644 --- a/core/amber/src/main/python/core/runnables/network_receiver.py +++ b/core/amber/src/main/python/core/runnables/network_receiver.py @@ -59,6 +59,7 @@ def data_handler(command: bytes, table: Table) -> int: :return: sender credits """ data_header = PythonDataHeader().parse(command) + print("data_header:", data_header.marker) shared_queue.put( DataElement( tag=data_header.tag, diff --git a/core/amber/src/main/python/core/runnables/network_sender.py b/core/amber/src/main/python/core/runnables/network_sender.py index 483b8f83a29..ce96496a075 100644 --- a/core/amber/src/main/python/core/runnables/network_sender.py +++ b/core/amber/src/main/python/core/runnables/network_sender.py @@ -6,6 +6,7 @@ from core.models import OutputDataFrame, DataPayload, EndOfUpstream, InternalQueue, InputDataFrame from core.models.internal_queue import InternalQueueElement, DataElement, ControlElement +from core.models.payload import StateFrame from core.proxy import ProxyClient from core.util import StoppableQueueBlockingRunnable from proto.edu.uci.ics.amber.engine.common import ( @@ -68,6 +69,12 @@ def _send_data(self, to: ActorVirtualIdentity, data_payload: DataPayload) -> Non data_header = PythonDataHeader(tag=to, marker=EndOfUpstream.__name__) self._proxy_client.send_data(bytes(data_header), None) # returns credits + elif isinstance(data_payload, StateFrame): + data_header = PythonDataHeader(tag=to, marker=StateFrame.__name__) + table = data_payload.frame + + self._proxy_client.send_data(bytes(data_header), table) + else: raise TypeError(f"Unexpected payload {data_payload}") diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala index dd2961cefae..c1b99b27b0e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala @@ -20,7 +20,7 @@ import java.net.ServerSocket import java.util.concurrent.atomic.AtomicInteger import scala.collection.mutable import com.twitter.util.Promise -import edu.uci.ics.texera.workflow.common.EndOfUpstream +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, State} import java.nio.charset.Charset @@ -106,6 +106,12 @@ private class AmberProducer( if (dataHeader.marker == EndOfUpstream().getClass.getSimpleName) { assert(root.getRowCount == 0) outputPort.sendTo(to, MarkerFrame(EndOfUpstream())) + } + if (dataHeader.marker == "StateFrame") { + //val state = State() + //state.fromTuple(ArrowUtils.getTexeraTuple(0, root)) + //println("wefwefwef", state) + //outputPort.sendTo(to, MarkerFrame(state)) } else { // normal data batches val queue = mutable.Queue[Tuple]() diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index c97fe6212f1..a368b80987c 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -33,6 +33,12 @@ final case class State() extends Marker { .addSequentially(list.values.map(_._2).toArray) .build() + def fromTuple(tuple: Tuple): Unit = { + tuple.getSchema.getAttributes.foreach { attribute => + add(attribute.getName, attribute.getType, tuple.getField(attribute.getName)) + } + } + override def toString: String = list.map { case (key, (_, value)) => s"$key: $value" }.mkString(", ") } From 85a7e07f56c12de4a7ea8f04378c24c6a9c4b1b1 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 5 Aug 2024 22:49:48 -0700 Subject: [PATCH 039/163] update --- .../src/main/python/core/runnables/network_sender.py | 1 - .../architecture/pythonworker/PythonProxyServer.scala | 11 +++++------ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/core/amber/src/main/python/core/runnables/network_sender.py b/core/amber/src/main/python/core/runnables/network_sender.py index ce96496a075..5a689467c4c 100644 --- a/core/amber/src/main/python/core/runnables/network_sender.py +++ b/core/amber/src/main/python/core/runnables/network_sender.py @@ -72,7 +72,6 @@ def _send_data(self, to: ActorVirtualIdentity, data_payload: DataPayload) -> Non elif isinstance(data_payload, StateFrame): data_header = PythonDataHeader(tag=to, marker=StateFrame.__name__) table = data_payload.frame - self._proxy_client.send_data(bytes(data_header), table) else: diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala index c1b99b27b0e..b768353afff 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala @@ -106,12 +106,11 @@ private class AmberProducer( if (dataHeader.marker == EndOfUpstream().getClass.getSimpleName) { assert(root.getRowCount == 0) outputPort.sendTo(to, MarkerFrame(EndOfUpstream())) - } - if (dataHeader.marker == "StateFrame") { - //val state = State() - //state.fromTuple(ArrowUtils.getTexeraTuple(0, root)) - //println("wefwefwef", state) - //outputPort.sendTo(to, MarkerFrame(state)) + } else if (dataHeader.marker == "StateFrame") { + val state = State() + state.fromTuple(ArrowUtils.getTexeraTuple(0, root)) + println("wefwefwef", state) + outputPort.sendTo(to, MarkerFrame(state)) } else { // normal data batches val queue = mutable.Queue[Tuple]() From 03a44f6d345559201d24366422b9666ae8f81dc0 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 5 Aug 2024 23:45:20 -0700 Subject: [PATCH 040/163] update --- .../src/main/python/core/models/state.py | 6 ++- .../python/core/runnables/data_processor.py | 51 ++++++++++--------- 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/core/amber/src/main/python/core/models/state.py b/core/amber/src/main/python/core/models/state.py index 3fdd64a2269..250c0e18f9a 100644 --- a/core/amber/src/main/python/core/models/state.py +++ b/core/amber/src/main/python/core/models/state.py @@ -1,4 +1,6 @@ -import pyarrow as pa +from pyarrow import Table +from pandas import DataFrame + class State: def __init__(self, data): @@ -19,4 +21,4 @@ def __str__(self) -> str: __repr__ = __str__ def to_table(self): - return pa.Table.from_pydict(self.data) \ No newline at end of file + return Table.from_pandas(DataFrame([self.data])) diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index 230bb5b2fb2..ac600ec6cae 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -34,31 +34,32 @@ def run(self) -> None: self._switch_context() def process_state(self) -> None: - try: - executor = self._context.executor_manager.executor - state_ = self._context.tuple_processing_manager.get_input_state() - port_id = self._context.tuple_processing_manager.current_input_port_id - port: int - if port_id is None: - # no upstream, special case for source executor. - port = 0 - else: - port = port_id.id - - with replace_print( - self._context.worker_id, - self._context.console_message_manager.print_buf, - ): - self._set_output_state(executor.process_state(state_, port)) - - except Exception as err: - logger.exception(err) - exc_info = sys.exc_info() - self._context.exception_manager.set_exception_info(exc_info) - self._report_exception(exc_info) - - finally: - self._switch_context() + state_ = self._context.tuple_processing_manager.get_input_state() + if state_ is not None: + try: + executor = self._context.executor_manager.executor + port_id = self._context.tuple_processing_manager.current_input_port_id + port: int + if port_id is None: + # no upstream, special case for source executor. + port = 0 + else: + port = port_id.id + + with replace_print( + self._context.worker_id, + self._context.console_message_manager.print_buf, + ): + self._set_output_state(executor.process_state(state_, port)) + + except Exception as err: + logger.exception(err) + exc_info = sys.exc_info() + self._context.exception_manager.set_exception_info(exc_info) + self._report_exception(exc_info) + + finally: + self._switch_context() def process_tuple(self) -> None: finished_current = self._context.tuple_processing_manager.finished_current From a5a27f61192cc3085298fd900abb7c9298ee99da Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 5 Aug 2024 23:47:51 -0700 Subject: [PATCH 041/163] update --- .../engine/architecture/pythonworker/PythonProxyServer.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala index b768353afff..798fec9a43e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala @@ -109,7 +109,6 @@ private class AmberProducer( } else if (dataHeader.marker == "StateFrame") { val state = State() state.fromTuple(ArrowUtils.getTexeraTuple(0, root)) - println("wefwefwef", state) outputPort.sendTo(to, MarkerFrame(state)) } else { // normal data batches From dd623cff1e4d5a7831d95acfecbba95db144bd84 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 5 Aug 2024 23:49:03 -0700 Subject: [PATCH 042/163] update --- .../engine/architecture/pythonworker/PythonProxyServer.scala | 4 +--- .../scala/edu/uci/ics/texera/workflow/common/Marker.scala | 3 ++- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala index 798fec9a43e..44022bc0c23 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala @@ -107,9 +107,7 @@ private class AmberProducer( assert(root.getRowCount == 0) outputPort.sendTo(to, MarkerFrame(EndOfUpstream())) } else if (dataHeader.marker == "StateFrame") { - val state = State() - state.fromTuple(ArrowUtils.getTexeraTuple(0, root)) - outputPort.sendTo(to, MarkerFrame(state)) + outputPort.sendTo(to, MarkerFrame(State().fromTuple(ArrowUtils.getTexeraTuple(0, root)))) } else { // normal data batches val queue = mutable.Queue[Tuple]() diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index a368b80987c..6fca9d37579 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -33,10 +33,11 @@ final case class State() extends Marker { .addSequentially(list.values.map(_._2).toArray) .build() - def fromTuple(tuple: Tuple): Unit = { + def fromTuple(tuple: Tuple): State = { tuple.getSchema.getAttributes.foreach { attribute => add(attribute.getName, attribute.getType, tuple.getField(attribute.getName)) } + this } override def toString: String = From ab2f9b08b4e7c0d60593795a90a6be1012d141c6 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 5 Aug 2024 23:49:20 -0700 Subject: [PATCH 043/163] update --- .../engine/architecture/pythonworker/PythonProxyServer.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala index 44022bc0c23..6dc4c0cab53 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala @@ -107,6 +107,7 @@ private class AmberProducer( assert(root.getRowCount == 0) outputPort.sendTo(to, MarkerFrame(EndOfUpstream())) } else if (dataHeader.marker == "StateFrame") { + assert(root.getRowCount == 1) outputPort.sendTo(to, MarkerFrame(State().fromTuple(ArrowUtils.getTexeraTuple(0, root)))) } else { // normal data batches From eed34337cd75aef664a27491cc1083a9063cdd7e Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Tue, 6 Aug 2024 02:02:45 -0700 Subject: [PATCH 044/163] update --- core/amber/src/main/python/core/models/operator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/python/core/models/operator.py b/core/amber/src/main/python/core/models/operator.py index c1f4a79151d..e7790ff941f 100644 --- a/core/amber/src/main/python/core/models/operator.py +++ b/core/amber/src/main/python/core/models/operator.py @@ -56,7 +56,7 @@ def process_state(self, state_: State, port: int) -> State: :param port: int, input port index of the current exhausted port. :return: State, producing one State object """ - pass + return state_ class TupleOperatorV2(Operator): From ca8d36b831d07bda6565d5e1fd34b67636180b6a Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Tue, 6 Aug 2024 22:32:09 -0700 Subject: [PATCH 045/163] update --- .../core/architecture/managers/tuple_processing_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py index c391daa40b3..bf9b0e1e240 100644 --- a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py +++ b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py @@ -29,4 +29,4 @@ def get_output_state(self) -> Optional[State]: def get_input_state(self) -> Optional[State]: ret, self.current_input_state = self.current_input_state, None - return ret \ No newline at end of file + return ret From cb5eecb0b1f981aa20c8c3e5d4addce5d7ff5d35 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 7 Aug 2024 01:25:45 -0700 Subject: [PATCH 046/163] update --- .../sendsemantics/hash_based_shuffle_partitioner.py | 4 ++++ .../core/architecture/sendsemantics/one_to_one_partitioner.py | 4 ++++ .../architecture/sendsemantics/round_robin_partitioner.py | 3 +++ core/amber/src/main/python/core/runnables/main_loop.py | 4 +++- 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py index fc275c7b101..605e54025c5 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py @@ -45,6 +45,10 @@ def add_tuple_to_batch( @overrides def add_state_to_batch(self, state: State): for receiver, batch in self.receivers: + if len(batch) > 0: + yield receiver, OutputDataFrame(frame=batch) + yield receiver, OutputDataFrame(frame=batch) + batch.clear() yield receiver, StateFrame(frame=state.to_table()) @overrides diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py index e8750b5bde8..5503797672f 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py @@ -35,6 +35,10 @@ def add_tuple_to_batch( @overrides def add_state_to_batch(self, state: State): + if len(self.batch) > 0: + yield self.receiver, OutputDataFrame(frame=self.batch) + self.batch.clear() + yield self.receiver, StateFrame(frame=state.to_table()) @overrides diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py index a4ad71a0a00..64af1bed1c6 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py @@ -38,6 +38,9 @@ def add_tuple_to_batch( @overrides def add_state_to_batch(self, state: State): for receiver, batch in self.receivers: + if len(batch) > 0: + yield receiver, OutputDataFrame(frame=batch) + batch.clear() yield receiver, StateFrame(frame=state.to_table()) @overrides diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index ad109d77ae6..86a91aa9273 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -20,7 +20,7 @@ Tuple, ) from core.models.internal_queue import DataElement, ControlElement -from core.models.payload import StateFrame +from core.models.payload import OutputDataFrame from core.models.state import State from core.runnables.data_processor import DataProcessor from core.util import StoppableQueueBlockingRunnable, get_one_of, set_one_of @@ -174,6 +174,8 @@ def process_input_tuple(self) -> None: self._output_queue.put(DataElement(tag=to, payload=batch)) elif isinstance(output_data, State): for (to, batch) in self.context.output_manager.state_to_batch(output_data): + if isinstance(batch, OutputDataFrame): + batch.schema = self.context.output_manager.get_port().get_schema() self._output_queue.put(DataElement(tag=to, payload=batch)) def process_tuple_with_udf(self) -> Iterator[Optional[Tuple]]: From 30d46957379a3e26d317701e5e7a316ff2a9a28e Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 7 Aug 2024 01:39:55 -0700 Subject: [PATCH 047/163] update --- .../sendsemantics/hash_based_shuffle_partitioner.py | 6 +++--- .../architecture/sendsemantics/one_to_one_partitioner.py | 3 ++- .../architecture/sendsemantics/round_robin_partitioner.py | 3 ++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py index 605e54025c5..adca89b28b1 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py @@ -3,7 +3,7 @@ from loguru import logger from overrides import overrides - +from copy import deepcopy from core.architecture.sendsemantics.partitioner import Partitioner from core.models import Tuple, State from core.models.payload import OutputDataFrame, DataPayload, EndOfUpstream, StateFrame @@ -46,8 +46,8 @@ def add_tuple_to_batch( def add_state_to_batch(self, state: State): for receiver, batch in self.receivers: if len(batch) > 0: - yield receiver, OutputDataFrame(frame=batch) - yield receiver, OutputDataFrame(frame=batch) + yield receiver, OutputDataFrame(frame=deepcopy(batch)) + yield receiver, OutputDataFrame(frame=deepcopy(batch)) batch.clear() yield receiver, StateFrame(frame=state.to_table()) diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py index 5503797672f..a2fd8b54880 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py @@ -3,6 +3,7 @@ from overrides import overrides +from copy import deepcopy from core.architecture.sendsemantics.partitioner import Partitioner from core.models import Tuple, State from core.models.payload import OutputDataFrame, DataPayload, EndOfUpstream, StateFrame @@ -36,7 +37,7 @@ def add_tuple_to_batch( @overrides def add_state_to_batch(self, state: State): if len(self.batch) > 0: - yield self.receiver, OutputDataFrame(frame=self.batch) + yield self.receiver, OutputDataFrame(frame=deepcopy(self.batch)) self.batch.clear() yield self.receiver, StateFrame(frame=state.to_table()) diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py index 64af1bed1c6..4c322293455 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py @@ -3,6 +3,7 @@ from overrides import overrides +from copy import deepcopy from core.architecture.sendsemantics.partitioner import Partitioner from core.models import Tuple, State from core.models.payload import OutputDataFrame, DataPayload, EndOfUpstream, StateFrame @@ -39,7 +40,7 @@ def add_tuple_to_batch( def add_state_to_batch(self, state: State): for receiver, batch in self.receivers: if len(batch) > 0: - yield receiver, OutputDataFrame(frame=batch) + yield receiver, OutputDataFrame(frame=deepcopy(batch)) batch.clear() yield receiver, StateFrame(frame=state.to_table()) From 4822bbec12007d5b080f35ddb50980b0d9b57572 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 7 Aug 2024 02:10:54 -0700 Subject: [PATCH 048/163] update --- .../workflow/common/operators/LogicalOp.scala | 11 ++--- .../operators/state/DataToStateOpDesc.scala | 43 +++++++++++++++++++ .../operators/state/DataToStateOpExec.scala | 28 ++++++++++++ 3 files changed, 77 insertions(+), 5 deletions(-) create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala index 314467c113e..3d6180f3fc4 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala @@ -50,7 +50,7 @@ import edu.uci.ics.texera.workflow.operators.source.sql.mysql.MySQLSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.sql.postgresql.PostgreSQLSourceOpDesc import edu.uci.ics.texera.workflow.operators.split.SplitOpDesc import edu.uci.ics.texera.workflow.operators.symmetricDifference.SymmetricDifferenceOpDesc -import edu.uci.ics.texera.workflow.operators.state.{DualInputStateReceiverOpDesc, StateReceiverOpDesc, StateSender1OpDesc, StateSender2OpDesc} +import edu.uci.ics.texera.workflow.operators.state.{DualInputStateReceiverOpDesc, StateReceiverOpDesc, StateSender1OpDesc, StateSender2OpDesc, DataToStateOpDesc} import edu.uci.ics.texera.workflow.operators.typecasting.TypeCastingOpDesc import edu.uci.ics.texera.workflow.operators.udf.java.JavaUDFOpDesc import edu.uci.ics.texera.workflow.operators.udf.python.source.PythonUDFSourceOpDescV2 @@ -147,10 +147,11 @@ trait StateTransferFunc new Type(value = classOf[AsterixDBSourceOpDesc], name = "AsterixDBSource"), new Type(value = classOf[TypeCastingOpDesc], name = "TypeCasting"), new Type(value = classOf[LimitOpDesc], name = "Limit"), - new Type(value = classOf[StateSender1OpDesc], name = "State Sender1"), - new Type(value = classOf[StateSender2OpDesc], name = "State Sender2"), - new Type(value = classOf[StateReceiverOpDesc], name = "Testing State Receiver"), - new Type(value = classOf[DualInputStateReceiverOpDesc], name = "Dual Input Testing State Receiver"), + new Type(value = classOf[DataToStateOpDesc], name = "DataToState"), + new Type(value = classOf[StateSender1OpDesc], name = "StateSender1"), + new Type(value = classOf[StateSender2OpDesc], name = "State ender2"), + new Type(value = classOf[StateReceiverOpDesc], name = "TestingStateReceiver"), + new Type(value = classOf[DualInputStateReceiverOpDesc], name = "DualInputTestingStateReceiver"), new Type(value = classOf[RandomKSamplingOpDesc], name = "RandomKSampling"), new Type(value = classOf[ReservoirSamplingOpDesc], name = "ReservoirSampling"), new Type(value = classOf[HashJoinOpDesc[String]], name = "HashJoin"), diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala new file mode 100644 index 00000000000..1466f3958e6 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala @@ -0,0 +1,43 @@ +package edu.uci.ics.texera.workflow.operators.state + +import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp +import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo +import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} +import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort, PortIdentity} +import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} +import edu.uci.ics.texera.workflow.common.operators.LogicalOp +import edu.uci.ics.texera.workflow.common.tuple.schema.Schema + +class DataToStateOpDesc extends LogicalOp { + override def getPhysicalOp( + workflowId: WorkflowIdentity, + executionId: ExecutionIdentity + ): PhysicalOp = { + PhysicalOp + .oneToOnePhysicalOp( + workflowId, + executionId, + operatorIdentifier, + OpExecInitInfo((_, _) => { + new DataToStateOpExec() + }) + ) + .withInputPorts(operatorInfo.inputPorts) + .withOutputPorts(operatorInfo.outputPorts) + .withParallelizable(false) + } + + override def operatorInfo: OperatorInfo = + OperatorInfo( + "Data To State", + "Convert Data to State", + OperatorGroupConstants.UTILITY_GROUP, + inputPorts = List( + InputPort(PortIdentity(), "State"), + InputPort(PortIdentity(1), "Data", dependencies = List(PortIdentity())) + ), + outputPorts = List(OutputPort()) + ) + + override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(0) +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala new file mode 100644 index 00000000000..952797a1fcc --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala @@ -0,0 +1,28 @@ +package edu.uci.ics.texera.workflow.operators.state + +import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike +import edu.uci.ics.texera.workflow.common.State +import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor +import edu.uci.ics.texera.workflow.common.tuple.Tuple + +import scala.collection.mutable + +class DataToStateOpExec extends OperatorExecutor { + private val buffer = new mutable.ArrayBuffer[Tuple]() + private var stateTuple: Tuple = _ + + override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { + port match { + case 0 => + if (stateTuple == null) + stateTuple = tuple + case 1 => + buffer += tuple + } + Iterator() + } + + override def produceState(): State = State().fromTuple(stateTuple) + + override def onFinish(port: Int): Iterator[TupleLike] = buffer.iterator +} From 79856ba5439feea24401587b815a9d9394b4cf4b Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 7 Aug 2024 04:23:57 -0700 Subject: [PATCH 049/163] update --- .../workflow/common/operators/LogicalOp.scala | 4 +- .../operators/state/StateSender1OpDesc.scala | 41 ------------------- .../operators/state/StateSender1OpExec.scala | 33 --------------- .../operators/state/StateSender2OpDesc.scala | 41 ------------------- .../operators/state/StateSender2OpExec.scala | 34 --------------- 5 files changed, 1 insertion(+), 152 deletions(-) delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender1OpDesc.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender1OpExec.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender2OpDesc.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender2OpExec.scala diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala index 3d6180f3fc4..b812ef1d7ef 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala @@ -50,7 +50,7 @@ import edu.uci.ics.texera.workflow.operators.source.sql.mysql.MySQLSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.sql.postgresql.PostgreSQLSourceOpDesc import edu.uci.ics.texera.workflow.operators.split.SplitOpDesc import edu.uci.ics.texera.workflow.operators.symmetricDifference.SymmetricDifferenceOpDesc -import edu.uci.ics.texera.workflow.operators.state.{DualInputStateReceiverOpDesc, StateReceiverOpDesc, StateSender1OpDesc, StateSender2OpDesc, DataToStateOpDesc} +import edu.uci.ics.texera.workflow.operators.state.{DualInputStateReceiverOpDesc, StateReceiverOpDesc, DataToStateOpDesc} import edu.uci.ics.texera.workflow.operators.typecasting.TypeCastingOpDesc import edu.uci.ics.texera.workflow.operators.udf.java.JavaUDFOpDesc import edu.uci.ics.texera.workflow.operators.udf.python.source.PythonUDFSourceOpDescV2 @@ -148,8 +148,6 @@ trait StateTransferFunc new Type(value = classOf[TypeCastingOpDesc], name = "TypeCasting"), new Type(value = classOf[LimitOpDesc], name = "Limit"), new Type(value = classOf[DataToStateOpDesc], name = "DataToState"), - new Type(value = classOf[StateSender1OpDesc], name = "StateSender1"), - new Type(value = classOf[StateSender2OpDesc], name = "State ender2"), new Type(value = classOf[StateReceiverOpDesc], name = "TestingStateReceiver"), new Type(value = classOf[DualInputStateReceiverOpDesc], name = "DualInputTestingStateReceiver"), new Type(value = classOf[RandomKSamplingOpDesc], name = "RandomKSampling"), diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender1OpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender1OpDesc.scala deleted file mode 100644 index 8c81ab1133d..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender1OpDesc.scala +++ /dev/null @@ -1,41 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp -import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo -import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} -import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort} -import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} -import edu.uci.ics.texera.workflow.common.operators.LogicalOp -import edu.uci.ics.texera.workflow.common.tuple.schema.Schema - -class StateSender1OpDesc extends LogicalOp { - override def getPhysicalOp( - workflowId: WorkflowIdentity, - executionId: ExecutionIdentity - ): PhysicalOp = { - PhysicalOp - .oneToOnePhysicalOp( - workflowId, - executionId, - operatorIdentifier, - OpExecInitInfo((_, _) => { - new StateSender1OpExec() - }) - ) - .withInputPorts(operatorInfo.inputPorts) - .withOutputPorts(operatorInfo.outputPorts) - .withParallelizable(false) - } - - override def operatorInfo: OperatorInfo = - OperatorInfo( - "State Sender A", - "Append State to Data", - OperatorGroupConstants.UTILITY_GROUP, - inputPorts = List(InputPort()), - outputPorts = List(OutputPort()), - supportReconfiguration = true - ) - - override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(0) -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender1OpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender1OpExec.scala deleted file mode 100644 index da6e8458d28..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender1OpExec.scala +++ /dev/null @@ -1,33 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike -import edu.uci.ics.texera.workflow.common.State -import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor -import edu.uci.ics.texera.workflow.common.tuple.Tuple -import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType -import scala.collection.mutable - -class StateSender1OpExec extends OperatorExecutor { - var buffer = new mutable.ArrayBuffer[Tuple]() - - override def processState(state: State, port: Int): State = { - val state = State() - state.add("state1 from A1", AttributeType.STRING, "before process tuple") - state - } - - override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { - buffer += tuple - Iterator(tuple) - } - - override def produceState(): State = { - val state = State() - state.add("state2 from A1", AttributeType.STRING, "after process tuple") - state - } - - override def onFinish(port: Int): Iterator[TupleLike] = { - buffer.iterator - } -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender2OpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender2OpDesc.scala deleted file mode 100644 index 160a6ab1a64..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender2OpDesc.scala +++ /dev/null @@ -1,41 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp -import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo -import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} -import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort} -import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} -import edu.uci.ics.texera.workflow.common.operators.LogicalOp -import edu.uci.ics.texera.workflow.common.tuple.schema.Schema - -class StateSender2OpDesc extends LogicalOp { - override def getPhysicalOp( - workflowId: WorkflowIdentity, - executionId: ExecutionIdentity - ): PhysicalOp = { - PhysicalOp - .oneToOnePhysicalOp( - workflowId, - executionId, - operatorIdentifier, - OpExecInitInfo((_, _) => { - new StateSender2OpExec() - }) - ) - .withInputPorts(operatorInfo.inputPorts) - .withOutputPorts(operatorInfo.outputPorts) - .withParallelizable(false) - } - - override def operatorInfo: OperatorInfo = - OperatorInfo( - "State Sender B", - "Append State to Data", - OperatorGroupConstants.UTILITY_GROUP, - inputPorts = List(InputPort()), - outputPorts = List(OutputPort()), - supportReconfiguration = true - ) - - override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(0) -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender2OpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender2OpExec.scala deleted file mode 100644 index df61aac37f5..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateSender2OpExec.scala +++ /dev/null @@ -1,34 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike -import edu.uci.ics.texera.workflow.common.State -import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor -import edu.uci.ics.texera.workflow.common.tuple.Tuple -import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType - -import scala.collection.mutable - -class StateSender2OpExec extends OperatorExecutor { - var buffer = new mutable.ArrayBuffer[Tuple]() - - override def processState(state: State, port: Int): State = { - val state = State() - state.add("state1 from A2", AttributeType.STRING, "before process tuple") - state - } - - override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { - buffer += tuple - Iterator(tuple) - } - - override def produceState(): State = { - val state = State() - state.add("state2 from A2", AttributeType.STRING, "after process tuple") - state - } - - override def onFinish(port: Int): Iterator[TupleLike] = { - buffer.iterator - } -} From ca02b6ddd3f7abe4a774c7dbaf542020bd8fde33 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 7 Aug 2024 04:57:26 -0700 Subject: [PATCH 050/163] update --- .../ics/amber/engine/architecture/worker/DataProcessor.scala | 2 +- .../workflow/operators/hashJoin/HashJoinBuildOpExec.scala | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index 8289348e9f0..a9fb39d899b 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -105,7 +105,7 @@ class DataProcessor( outputManager.outputIterator.setTupleOutput( executor.onInputFinishMultiPort(port) ) - val outputState = executor.produceState() + val outputState = executor.produceState(port) if (outputState!= null) { outputManager.emitMarker(outputState) } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala index 697da47334d..e6f294de4a5 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala @@ -21,7 +21,7 @@ class HashJoinBuildOpExec[K](buildAttributeName: String) extends OperatorExecuto } - override def produceState(): State = { + override def produceState(prot: Int): State = { val state = State() state.add("hashtable", AttributeType.ANY, buildTableHashMap) state From 1122cafbe05b607b5c56706221da4f8f241d1946 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 7 Aug 2024 04:57:47 -0700 Subject: [PATCH 051/163] update --- .../ics/texera/workflow/common/operators/OperatorExecutor.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala index c571f8e2f9a..89a46aaf98a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala @@ -20,7 +20,7 @@ trait OperatorExecutor { def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] - def produceState(): State = null + def produceState(port: Int): State = null def onInputFinishMultiPort(port: Int): Iterator[(TupleLike, Option[PortIdentity])] = { onFinish(port).map(t => (t, None)) From 54721017e3d5e71532a1ce5fb0b48960c0f46965 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 7 Aug 2024 05:13:53 -0700 Subject: [PATCH 052/163] update --- .../src/main/python/core/models/state.py | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/core/amber/src/main/python/core/models/state.py b/core/amber/src/main/python/core/models/state.py index 250c0e18f9a..69dc4b73e49 100644 --- a/core/amber/src/main/python/core/models/state.py +++ b/core/amber/src/main/python/core/models/state.py @@ -3,8 +3,22 @@ class State: - def __init__(self, data): - self.data = data + def __init__(self): + self.data = {} + + def add(self, key, value): + self.data[key] = value + + def get(self, key): + return self.data[key] + + def to_table(self): + return Table.from_pandas(df=DataFrame([self.data])) + + def from_dict(self, dictionary): + for key, value in dictionary.items(): + self.add(key, value) + return self def __setitem__(self, key, value): self.data[key] = value @@ -19,6 +33,3 @@ def __str__(self) -> str: return f"State[{content}]" __repr__ = __str__ - - def to_table(self): - return Table.from_pandas(DataFrame([self.data])) From 234e83b33e688ab8ba8021c99fd65f1fbe7d48bb Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 7 Aug 2024 05:14:21 -0700 Subject: [PATCH 053/163] update --- .../main/python/core/architecture/packaging/input_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index 1eb98bacd27..a6a8f86d5c6 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -97,7 +97,7 @@ def process_data_payload( yield SenderChangeMarker(current_channel_id) if isinstance(payload, StateFrame): - yield State(payload.frame.to_pandas().iloc[0].to_dict()) + yield State().from_dict(payload.frame.to_pandas().iloc[0].to_dict()) elif isinstance(payload, InputDataFrame): for field_accessor in ArrowTableTupleProvider(payload.frame): From 526b49561af26a1c272fb57c39bcfbe8ad360e4d Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 7 Aug 2024 05:14:30 -0700 Subject: [PATCH 054/163] update --- .../ics/texera/workflow/operators/state/DataToStateOpExec.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala index 952797a1fcc..4d465a47028 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala @@ -22,7 +22,7 @@ class DataToStateOpExec extends OperatorExecutor { Iterator() } - override def produceState(): State = State().fromTuple(stateTuple) + override def produceState(port: Int): State = State().fromTuple(stateTuple) override def onFinish(port: Int): Iterator[TupleLike] = buffer.iterator } From 3773ca14a7adec19e6389512a0db8cc2788082a9 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 7 Aug 2024 05:27:41 -0700 Subject: [PATCH 055/163] update --- core/amber/src/main/python/core/models/operator.py | 9 +++++++++ .../src/main/python/core/runnables/data_processor.py | 10 +++++----- .../src/main/python/core/runnables/network_receiver.py | 1 - 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/core/amber/src/main/python/core/models/operator.py b/core/amber/src/main/python/core/models/operator.py index e7790ff941f..603cd3e4517 100644 --- a/core/amber/src/main/python/core/models/operator.py +++ b/core/amber/src/main/python/core/models/operator.py @@ -58,6 +58,15 @@ def process_state(self, state_: State, port: int) -> State: """ return state_ + def produce_state(self, port: int) -> State: + """ + Produce a State after the input port is exhausted. + + :param port: int, input port index of the current exhausted port. + :return: State, producing one State object + """ + pass + class TupleOperatorV2(Operator): """ diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index ac600ec6cae..8c003a18ab8 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -75,11 +75,11 @@ def process_tuple(self) -> None: else: port = port_id.id - output_iterator = ( - executor.process_tuple(tuple_, port) - if isinstance(tuple_, Tuple) - else executor.on_finish(port) - ) + if isinstance(tuple_, Tuple): + output_iterator = executor.process_tuple(tuple_, port) + else: + self._set_output_state(executor.produce_state(port)) + output_iterator = executor.on_finish(port) with replace_print( self._context.worker_id, self._context.console_message_manager.print_buf, diff --git a/core/amber/src/main/python/core/runnables/network_receiver.py b/core/amber/src/main/python/core/runnables/network_receiver.py index 675cdc0c984..79e63459ecf 100644 --- a/core/amber/src/main/python/core/runnables/network_receiver.py +++ b/core/amber/src/main/python/core/runnables/network_receiver.py @@ -59,7 +59,6 @@ def data_handler(command: bytes, table: Table) -> int: :return: sender credits """ data_header = PythonDataHeader().parse(command) - print("data_header:", data_header.marker) shared_queue.put( DataElement( tag=data_header.tag, From 1ff31d199941f8b96432b22ec80e493089548179 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 7 Aug 2024 23:07:09 -0700 Subject: [PATCH 056/163] update --- .../workflow/common/operators/LogicalOp.scala | 3 +- .../operators/state/StateToDataOpDesc.scala | 61 ++++++++++++++++++ .../operators/state/StateToDataOpExec.scala | 31 +++++++++ .../assets/operator_images/DataToState.png | Bin 0 -> 19569 bytes .../assets/operator_images/StateToData.png | Bin 0 -> 24997 bytes 5 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala create mode 100644 core/gui/src/assets/operator_images/DataToState.png create mode 100644 core/gui/src/assets/operator_images/StateToData.png diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala index b812ef1d7ef..7984f5dbf7c 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala @@ -50,7 +50,7 @@ import edu.uci.ics.texera.workflow.operators.source.sql.mysql.MySQLSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.sql.postgresql.PostgreSQLSourceOpDesc import edu.uci.ics.texera.workflow.operators.split.SplitOpDesc import edu.uci.ics.texera.workflow.operators.symmetricDifference.SymmetricDifferenceOpDesc -import edu.uci.ics.texera.workflow.operators.state.{DualInputStateReceiverOpDesc, StateReceiverOpDesc, DataToStateOpDesc} +import edu.uci.ics.texera.workflow.operators.state.{DataToStateOpDesc, DualInputStateReceiverOpDesc, StateReceiverOpDesc, StateToDataOpDesc} import edu.uci.ics.texera.workflow.operators.typecasting.TypeCastingOpDesc import edu.uci.ics.texera.workflow.operators.udf.java.JavaUDFOpDesc import edu.uci.ics.texera.workflow.operators.udf.python.source.PythonUDFSourceOpDescV2 @@ -148,6 +148,7 @@ trait StateTransferFunc new Type(value = classOf[TypeCastingOpDesc], name = "TypeCasting"), new Type(value = classOf[LimitOpDesc], name = "Limit"), new Type(value = classOf[DataToStateOpDesc], name = "DataToState"), + new Type(value = classOf[StateToDataOpDesc], name = "StateToData"), new Type(value = classOf[StateReceiverOpDesc], name = "TestingStateReceiver"), new Type(value = classOf[DualInputStateReceiverOpDesc], name = "DualInputTestingStateReceiver"), new Type(value = classOf[RandomKSamplingOpDesc], name = "RandomKSampling"), diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala new file mode 100644 index 00000000000..f46f319bf46 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala @@ -0,0 +1,61 @@ +package edu.uci.ics.texera.workflow.operators.state + +import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription} +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle +import edu.uci.ics.amber.engine.architecture.deploysemantics.{PhysicalOp, SchemaPropagationFunc} +import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo +import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} +import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort, PortIdentity} +import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} +import edu.uci.ics.texera.workflow.common.operators.LogicalOp +import edu.uci.ics.texera.workflow.common.tuple.schema.{Attribute, Schema} + +class StateToDataOpDesc extends LogicalOp { + @JsonProperty + @JsonSchemaTitle("State output column(s)") + @JsonPropertyDescription( + "Name of the newly added output columns that the UDF will produce, if any" + ) + var outputColumns: List[Attribute] = List() + + override def getPhysicalOp( + workflowId: WorkflowIdentity, + executionId: ExecutionIdentity + ): PhysicalOp = { + PhysicalOp + .oneToOnePhysicalOp( + workflowId, + executionId, + operatorIdentifier, + OpExecInitInfo((_, _) => { + new StateToDataOpExec() + }) + ) + .withInputPorts(operatorInfo.inputPorts) + .withOutputPorts(operatorInfo.outputPorts) + .withParallelizable(false) + .withPropagateSchema( + SchemaPropagationFunc(inputSchemas => + operatorInfo.outputPorts + .map(_.id) + .map(id => id -> inputSchemas(operatorInfo.inputPorts.head.id)) + .toMap + ) + ) + } + + override def operatorInfo: OperatorInfo = + OperatorInfo( + "State To Data", + "Convert State to Data", + OperatorGroupConstants.UTILITY_GROUP, + inputPorts = List(InputPort()), + outputPorts = List(OutputPort(PortIdentity(), "State"), OutputPort(PortIdentity(1), "Data")) + ) + + override def getOutputSchema(schemas: Array[Schema]): Schema = throw new NotImplementedError() + + override def getOutputSchemas(schemas: Array[Schema]): Array[Schema] = + Array(Schema.builder().add(outputColumns).build(), schemas(0)) + +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala new file mode 100644 index 00000000000..b619f40eedc --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala @@ -0,0 +1,31 @@ +package edu.uci.ics.texera.workflow.operators.state + +import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike +import edu.uci.ics.amber.engine.common.workflow.PortIdentity +import edu.uci.ics.texera.workflow.common.State +import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor +import edu.uci.ics.texera.workflow.common.tuple.Tuple + +class StateToDataOpExec extends OperatorExecutor { + private var stateTuple: Tuple = _ + + override def processState(state: State, port: Int): State = { + stateTuple = state.toTuple + State() + } + + override def processTupleMultiPort( + tuple: Tuple, + port: Int + ): Iterator[(TupleLike, Option[PortIdentity])] = { + if (stateTuple != null) { + val outputTuple = stateTuple + stateTuple = null + Array((outputTuple, Some(PortIdentity())), (tuple, Some(PortIdentity(1)))).iterator + } else { + Iterator((tuple, Some(PortIdentity(1)))) + } + } + + override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = ??? +} diff --git a/core/gui/src/assets/operator_images/DataToState.png b/core/gui/src/assets/operator_images/DataToState.png new file mode 100644 index 0000000000000000000000000000000000000000..27419e8a612fb2d5b08a302c1f3eada0283d6924 GIT binary patch literal 19569 zcmXV%1yEdFkcA^S6I=#&2|hRk3GVI$cXx;2?(P~qxI=JvcY?bGcis2zE>*}>!7%gQ zz1^q3b8eWttk@R>T!arFK75f77gqf6;iC}nb3Ysm@O7dh)&=sCe8uMjS2*6LmW{{sRd)i9hmG zaVVU-?{_o!j|{^}?_T#VJtoVew|qGldv_-HE~dpVcN}$fbq@K{)P_qF*t%Xd51ZWq zUm_wRFa-nzf~%{mNgFgPluDIhywQ;g70uz0kQRah1MN+Sh#B4j~(5E@&8?2%_Lns zTk2S)UFLGNJ4{t`c2u16W;=>SzhIBY431>hADBY&xIb%uzStn~k!KuhH zhx>R(nGCl4LehNE`99B=lc3bPd1oD(;_n{J&aBtvTLBLbzgBG} zw<6*0vp~8>p($9YJOtuym(HIy!sl`*U?UpSV-lu*kPA-IYIUL4s#G2U!30R>Rr5aT zO_f-QjKq?b({-ct5ciC~99;3^zL|nxgrHc0*%eC|6^|k!xIX7Ep0qHrYX6IyV$!6Q z;<>;B3knXzfuaN}shu4|y7g8=aD?!_Vyjk$>h@)6O_VH0kaad-vF^3(^IU?2goJD_ zyZ`?w(FhfEB9sIC?C-u{c%pdvBY%Hplh6u-w3$5h=rQ#%8Jdx8?e&B~cFXgm3l%T( zbdbWzid4!Xb90IJahSm-dIM9U82;q63=I1zZD7t|WoDcc79*yyuE@N0aW)AYQ1E0= zBuPNG9!e^t9`^05oMZKS4HnP!7$Y-qt-S`oHfuFMR}byaDPHdHH(HAMd4e6&Ov(*du?LaHIGf7Q;}@9s(iKFP9;gbtaQ%T<@b?uQ2Ughxsv- z-fi)^doKR|bKEu`CrB<))*z}=IBqLU7Kn&tUkLe}9AB=pe2(Y#S`yFMtuu$J44B$J zb5k$=NUHXJEyj)74;3s7T;2*i|M$&_lenS1jLo|zFsUF+3k|zLt6QRwnm#Bc6fD4y zJv}`gQaCY4!2Nu`K@exUJLAcZoUcjbcXfSzBJ~;^HoSIaG09~9{jz(SzJPffIHXc{VPryPCBu3wKNV! z-))nLiHSw-!^1;EN|l!Ruoscc$xow)fQOuRVIYPQ-=}Z60amaeZ^Cl>Dj?j z>z(|f#j{^m`uhpCKdlN^q_b5`Q&P1)E3>2XKN|H1#!!KF_kWTYs5u-=X4W_Dm5}Q#cl90<{cc?dH_SXZk#3l)rdBbm)e}7x&iGPuj zZOM24gl7G`8l+lQF@4^!*my1{X%QHKL-g`NTr7W`cyw%J#_4p){xh)H-Jq&if*&6r zzv#H{CE?&4iLoLS#v@ap27Qb#nY^G2M`-xHpimKWAprV#)?x3kKbCgz$K~Ijt=8L< z)|oSqR2Q+o%fFSD+g}5EaF;ZO>hkjPRl4tQ5wkO)VkNPo&E7rC-$kgtFiZ$3e^m)S zY_zsd=-LzFXmA1bH8iBZ$H$=D~7?BhP?K{Vh zpx}&4Ttm0#yMIrU{O>QvYu{X)l&LSEm=>jmRIoy$N0s84XxzeA$;*tmczJn`;S{8$ z$3CKA^_(wI$jgZ`vN(t;NUlT8)M>RmE5&eSw6(Pbr+)Mrj;FX>E*Op=5d(Xc3i8GC z^e|&0WXd~z^D>EjX?4FgQI8?yy^B`?1^>82FjNOVlH4#Z;PM}dkt{RZ1kYi0jJ32@ z<;-lZ1!VW0#xqbK0{v>(y|bY%`=EPOGCA#kxi;CXC8JkW|GnlCYdv^eAzOL70|DMn5tr`#Z zp|FFaBbVWjdzp=u{|X|AWE&S3_trpM4kGiR4uK7OwBYKx=T+8?%7$bcO> zbyv2$j*d=1YHF%bUqXVHLl0`)hz2RoWA)GHyBbCNY7viizTJn*9Y*K#^`DHFnvp1X zjLu%Rz^=&p=DOag*X_?3Hmx9$RZNk^@56`n{C(QbOj913OTx+L-`^&F-yMnmps?Yz zc#u;=sHsB0IeL6~eYCh490`8DPzVxiGJ`g^5Di1J6Yy3z&|S#pDjkZ&!pjEMQBwHau%-tg+NOe4bA9uvQV*8uuWbF+%-0QKCcH+uz&+3Ci;Jr!0Om`&KOUYJifi3x!a!h?IMP zy_WC(uVK+d?{O+mNQ;MnAbGp;h7ES6nJyHv!zb~lK&4c%P;We)O{k}*$D#RqPrdnU z^4W4-v4pCsYNBj0?J|*?+fZ%cH@k_6PIl{+0yeX$&10bDdtM5Ff*o^Jr<0zZ&pZG2 z()MnZgb4B?Y}KnNqS>5-wo)$~WV5*Eg}`F0d8kN}V$^vd1~CQOo3ax9VF(Ass%2c$ zOsmzturaL5O|~1~=7fs=BKROnxH}7mHMuHt76WBDvE%2n4fZ|}dP2TKi|NBjRp%X; z+v)H*wFRnbVJ?EO#<9t?+nuEk?|7zgw1dd!rG^Eb+3WRoaW+g0BcNEluj(CyF^RIN zb=mHEzWyzp)jZzuVzWE=`TpFNkq6e^qxsvrSvQ5~GchDOxZU%9;Ia&imif)6 zTwK;7BO|F5TL_tS-fTeTFVzIe_8kRjV^Q;CqoX?uTc7-#%_tG0sO((dNW8%zp^Nos zRctc0Ow{r5@g6}?u)6yk`Y7HHmxIVA6Y0xJCQKttDdfX?XKTv}*1smC(^!oa-P|5D zyp>>LMk7fGSKg1ys|3gJvje@ zN%?vlXYjsgQEW6gF8)oTQQ_`~g=Gb!uOkZhC6)_ zmIY37yX7)s;&Y67g;oo5vRkk>=)@ zP|dAPR}fOCB!$mN1_{Py)c)=K@^C5o&%j_ptweqczmta85B+KsY-6#;tf=QWjX;KS&rb*6vmtmi}~L!8X6kbeQNe!=~TE(Km@%j*LB~t zSFJ$(Lt&_5*)O+?X@8c*>hM^-pN@W|vsq?us=;EA6U+g18g#5~yM9GE!2Oi1+xI2!vu9Fnx+ibq&nZuW8R& zYf0IF;6o*33hv=yiZ$+2DODbJcuP@wj+rek6BQQbnK>M$nthP@^xqnaPK6i_{8bml zo#v*7(N|ANNKpF5&%aTk2t9ATQg7ZR7rupI82Yst<_m898*tfUx=~0YP5dc#{2GiU zK)8-ebzBO4TTZ&$JtQjXBRzj3^ZIY9uU_nQVk?e58j_DO)?|h$uaH667o9jbOe~Tl z@nj|r2h;zke+WYaq42%l%!YNoJy{OVIqzS^<4PR%yfe)Gak%~cRP*Q7|770tl)6*m z`*t;?&E=ow?8nbAbj$VTN-4Bzm2)l)HPscr*Fy#*wZ4yGf*qTmUaa~JPOW+Pb;Xgl zApO@A2HR_^mWvj9I_&`^vwz~6 zK;OS^mT&Lq?7{g-W>nFfhCNt)uxhb>#;D$G{%Y0V{rcJAqc^Ci7ZDzQc&}44YMMHw zbj~8JvP2#WeUaoJplQ~tbpFMGyq&-6S6RpXTWT>pt$0}99<8ncYSzq#P_CY7nlWU*Uo5`p#uQR|%&`m*$BKG|@83f>T4tiSn1>kMdm3HLcnY)8qJ zR>r&SekshgS8fegp^4U#C$jl#^GA+iSf)(~>s6i*|1ZJh#I|pD4R4>ZHd-7xHrEZU z>mbsr@tkj#prVPfInMTw%x*h0L7}k4iEZcFwLh-+CwMelTU(mm(=L&eO2kyj5OW{J zLBG!%>c1CHe%h$Iid8Rz8PuB(A0x?da530bs&{5@SiqISmCCV{dMKtSj#WBeI_)kE zN>idA(UEzTTYPov(LPHM3*<0DLPgsJRn^tW|2CMOI|_w-(XE(0YIc2t`oxacdpu7x zEcTHF7sN~;!3xux(Yfj5u1pIR^Jh zy8+02;KzrZ7~QDwaLI!JFsSO2@pW;I_wlgLtt-X^YcPgWP#$!%&-K9)gT4c*WJX`N9uym((}!wSbPT{ge}fTAAD%;3 zzWQaSFG6W)LRQap-6FsVxS zx-#DpUp70a_|iX!eUL-3`cJCSVxGnjkXGkBH&a3%*@{Un5k@;-@BW^gt+w=rEWwN~ z#;;Or6kg#XO{TG!63$vQJOK>jI0~2TDkD!)Hm#@3VQ<9LVy;Aiumh>|U3^_`IHXr}yV;t%D&;!HJMWe?D)|x0lC1EiEm_ z23cp9CX+k~V^w$Nnd~hmK*z~eXt0=<^m%`Eh2jxj@i8mCQva>lJRO5zESu~db89^O ztEPTVPSiWb(J5)j$5>K;{Pr7-%>u4p-;pW&Rhgn*PNQ$dXgr&JHPki+9-gONZ}6A% z#Y*kJRXGCeVnF#Qghs@QY}m!wel#leqQ=P>K4AQEFoONqmLqq_FT9pKI*~Q%ezVe` zWn4Id%==>08|+qCF1N#K2$$`)9p;oIse(Nlmh;WEGFrJGHH5cW#GFnAORBReR2V-< zeYG=aqZLSK?i=vXYVd{alXwku|$Gnj`J7 z$5ijH-h4?NwV0Mcl;b0&m+O2M+p~QBiXXoy z(Q67nof?6WHe`|wfR0b5*9h3=Fkp!-=IG<|x`G zpzdEH5$j$j6-|wLI2_7Lbz>m0uM3zClX$kvSG}o&csv~u$R=*^I2s!1ySuxh9cC!n z79bTGmA9Efsg1+GP3K5%K}?~32opSZlk4t%1AT@6R$F$)kLSy34z}B4r>5kW^Mpcv z-JdS68O`i|9evcx>Ds$Rhq?S~+et7}rQ2z}ZQ?-e5ng^|&3{O?TIcGTgMdl%G64(8 zjC?HZC>JW8bv&=05TBeXAmo|42)Nu)yDn=ZY-yz_)~Q0q<8?U=7eT~oBei0t6}fNf ztMaZlV&MH}9k#(IwqwY!zRKgX%i*>GMLmP*nB&%n=lThv40SRM`J!Npt3O2h$0w-g zrmbZCa0wdv(jMx^Mu#bUq^{7);cfpa)lVg8T3*w4W{Q%Pt$9}w3@m#uEk%-g56MpY`GbZXG6`clP z42X96Ko8JQgl%B9nyWQ1Al4_pWi}j+B)!|V9zaPE19x9-?HSMVy<1Im`F-vP4-MrU zuQE#Y19!tYpbpnHoD9zo#J=$XA1wj_Fe#9cK*S3{9s^J}@bU)YXhkL?dyf=j(+Ljo|}vD433j?xs?sB9dVER_qR*2hPmBcvo<;cQRLaW{20{D zZwR@JxI#(cZXJC;*uhS_)iT+<3AF5QhtPIWId4$%1nuFLU zBuwv-V0@9D&btpVdGkyC>$IN`STH?APmc)xeO6E|6Tv9W)VdK!#=e7JfJuk8##m|_ z%pN&FgF(RUBu--Y^KxO^#t=>p)!7J`5;KzNG`6*|xiv3%3IWPcNdYf=oG8&pos4#A zGm_}$=^9~V^R>Zj`sxr^wKjdv?u8SJm`{%fInTiUuWe$<=A1x?y^Xh3#Ki*fDc z)|zqpfj=DpRzYKLkxWK58FX5!luX9N{>8cpv%~uLw%!FCSY$%CZ15vq5&a9I;``jL z7a0N~A}w0b1~7B$SE%A8r=+M|PGo)C0svTeb5e@h+f@>6()E>15GDpjAO0ctR4^{XEJ0`_Z=bmV?dhfwR9cFaZ93rIXXgEIod1kA}2^F||`~V#sy<(rrZ(vGg!W zq78G=R;$WqZzPGE$zri;!xuv9^SW{icg0K2&!59$wOFN#ywPB2ioL4$0j&M~1;Yl% z{IgJqQ|{wX(DiHAO{)-`4lyG%7jvf(LGslP#!!~+nCp|3hLa9Jl||k>lzbE(LMJ`@ z*({pR^(jT8ggCD6q(9!`4?MKmMj8Smwt$0@4M-8%F~^>OoA7BP2oa)qa41m$v_)Kr z4QV5LJG*LB8EUWaNimvnt2pIsIEo3mn!dlYZo^5GByZGIz;9PP+5cUzk5R5NDStI1 zFceoWfpD_+l?dMl9lyD6w}r;Bz`&MD6{teHdpqSn+HhYc)hO6=7D9Mr>0 zlW*E;Vtm}1CMo+Ye)fQ+KDi1Qs-Na-jTsZ5wuGcZUqJ<`8tM+)^snox;Vl9c4jOg) zlSZz`OGisU6ZTLkSMNR<=w`ECVST(DBvQ))py&XAm6Qn9p>C-IIalFo=f<}A$MxBL z9*7J&+S1u9w9L>6vVQDsg}gHbd2s0G=NPIPzD z-$XAq{~bAmN7h7Ol2Y{RD8RblYEqx6-5DoV{X%L%-`ab2pJM`1<|P7~k+l7o_xZH0 zGZt%1#ilUBzCOu_1O8+|cs5_Txe7k6nHGRQUtwQI~U$SuQK+T=t-%7)eMnF;G z)YR0NQ`<3=N;ANGu7+^>oPDyP4wmzM#Khx&oeL7INcJ34l!zpZB<&zWZrqIQr(^`+G0<9PC|Ff*DpxQ#{B?iPbnk4 zJe)v9l*+c2;l5X>cGvTTW(F(1k5Y!IMX(4-(eXq+T09}e=l9SYtc(g)J!TNAeKH-% z7~+tE9gJX}_tr08xy0fBT!df{4)^Uq6rRkGMU#P~Hba z=lkpVsSD(vtX%5FG>UhF+;^U&T|lyWtrkk97(IpRK3?s$I9|C#Uc^q#&Dj&G81pTi zl>TTsv(?wvU#(?{6wmbyfeZb4_;5AKIN9lOYe7zQilybik#?#iQtky8hr5$I@< z@nTu%L4V5+grLJlYN9GYQ6(M^5M%p!1!+}Rlsv@lYrRY2aD>j#_rgy% z#J5!uo?f^c$xebWm6_jqs1118cVXBwM0{SH(O|>bwmWuahuxtsQpt4g zK^~huZpmGd`sSe!VJWA$FYW5zzX1)3#baVWFaJ+8{x^@T?)u2jJLSn=~ zW80>iR$7QoT);z*`a@4O(%-3nnO{q{2pdBOPCAs4sU^{~+<`k9U1Ih|yj>B}c{zah zBW?q{;Oo^>%QB->4m;aaU*Ii7(a_HOtJjGbWS_UEjOnE-K+>Jrw`aLuzKmEnGleUY z&GSf$Pc)c6h=H6JhwDw6$jR3zNuwQvR-B}+*oi86oEk0ePwU zH0(!XZ&?yx1f6F<-(qBapPoKHPo+&^{nBT#TsyDfjh+z}8oJ3N zbQvL^YXL>g@Cl0i|kbO#1LG09hks>r{{ZusG{hO+r zyjQg@v&YTh*Pl-}%R(8!Z%E@mAoVX1g-GzV&snsXWHh^u93^%Mfd!1WJTj9F8GY1W zWLzm217ruCQ#R3!0Pi*a?sd-SGM=KY`2!Ui*qRJ4_p@&PUt!~$quFQ2vj`$|e0;A* zfGWzF0eq4D=Teui)#i79aPjoZ_0o`TP4j^-XT^5TL+iHl zbdL3nkH_P*ziv@LWioyTAaJ9j?x#?0XA3IVWAHoPe#UHEPcX;1ewq}mA5fYijNwZ+ z;Px!ch?2O1LXuiXZ7o>LC=KPgd9h1B9``sV#Z$=pjmGDT_e)S$$|)0@66hYQe_)p+ zQH%X)$Vbp(wFrp&%i8*_-Vobejv74Ef)rgp9P>Y}=j5| zi;F*2b8R6n3yex@Z@`V1hXH|oVc}1U3i@Vg^>dZ-MJs-g6G(P?5`TC5J)Q95nD1%n ziw!aF0exLjtz=>Cvki0aZ5PnT=hLjbu~F5d{K%IbT->hS?717W zczD>Jf~7z@!vmwftBrGpflRzqeg+G-7qJ_i=0*kUKbxl0`fuaI*;?y4xxPxvcRaT) z@UPxQtT0q8@~`e>p;)3ko71g)KcB2N13t$e9v=4Iph}I6oFYCR&=pK5^draAgtWSz zr|(}FaDG|wFH}UGL=p6%7$6P)9bi1nPoa-c-9XN0eT9-{O6}_=989A^$k39H8_IL1 zz7|lyTG4WkfsF}mf*6M@Qlnpw8PMHzoRR@hgVB1;3M)VPr!2KEC$3DV-@zIn=^xifZp+hF=TO&9zz_!Mw^oBkObZo9Z2!?r2?+DkK5DD5rdX5&ZoB#ktjpJu&MLBKg${d3?)GB zK|5ed;5i?;7N|oE@q~Qm`d4y%FE5IQwfY9Ky+cym9xk>@I5xblcgoAl7n{GsOGf5( znr3ocxFJ(gca0|i1`gqFz~}T3V5D7Tkx3_*jnEnHsz%mq(%MUd6yzO6KcYY|)ipIW z=UUG@zL7#83J$V;zW1Ow`3ocfZoc?1^V5b=>t=g??NC%%VKFkSwhmAph=1xgfw}^T zyK?eeAlXB>T~1R?fIh`ou3lq++bIEG7^zQe1isJZXaSJGRMmHM#B36HQ%(kIl0+Te5K(L;in$y0&ur*_8|#dnz0LlA9uB#^;8}fa~km zuP$$G?vpt@&c`Bww4o4cgxxZ%C$tYdLh<5-I-G7#TMTr!f_d{d{DZbL=YCiuN|nmnOhBa^;9uE&C#n7DToERlX$%EqXM7j%30o zk9yni`-C8m?l1L5L2J0HR7>~-@*!>ypd_Z zXD-*{#>Vkx0$V~>Naq4$4kHQq4-i-oW(SOqUmlbsY%)!2NUYClkV&HRt1@w|*`78N z8OiFQ_WQFE&IAd{^KLs~kXF&O8S|g^sax;?jv^U2bU?mTIpL2jF|^9p=i9kqTra!t zj$UFgIt2}3#7}~I+-3@D71Sjw{J{=H@CS# zB0(SdCoUvyY(`hh7CDh0brkeVUCko)5O_4a<-L!nRNUM+?@8S}sp;r$JQi`0Vp;a6 zD<^!?t3ql2__0>7;$(2xoJBiR3_1^lJ0wp;6=Ou42X!N-)Jp<&f_qrz%!w zLoMKs&&k=ixFPCPqoD9d{)(rIqq`3mjVlyvii#)f7RVHMeK4u}0$`)l%>XWO9a@ke znK-OMdIH$-RlK@Zum#aWBo{44>JJgsXDSEP!h%IS48VTG;~?dodH{kNu!N%{(TOKf z6dZB|BOfqDQYQ_<;#Ned!&~gs1W!Kl$z0>m7?Y6XxpZ=9sFm-m7?t=Qy7;P*GT5(2 zb9NXkhjZ%o|C%~4YHQ|aP^2TNpNQC+$%1<}pDmJkT4}H>uD4otAn5!9C1P(6!Q&^{ z+zm?@=gWFHVtTXe%AL2cB&VdbXciP7Nthe$05=N!E;>TTHc;p{ZicmxCfP-=zM*=A@S3d-&J1Axkq#RzNKtT*D{yv6)zpZsu~QQ#d+@ zY0FwM^z}(()Aek%l*4c!Qb*lHrUe!rL6M7D;j>y(U{cZ+uGtELNnU*a;9#-${hF)c z@my)b2()HyA4?!=HC5fF7~I|oF)AV=qJ$>c2v$)kKO_p=n5?^(BAvl*J>&No_SYfi z{NFrRoGc2dSf?7#o{b+(OLb?Bq=cn5G#1We?l1x~qY{3Q~BO;Wa zT9erD@Q_=4aH#&+#7JdEW;$~w;spzUTkpC7;*KjH6YCy~{7Zhx7IePe{$mEZ-wv6& z7s#S*Gy-j}14ug59?f?mI9(ZhBt$j&nrH3eSc`edGS(cNquh5 zhubM4KL}ush9vjN?PH*!SuNHL198>emHAaT{k875R|3t+@p)H$Z>cZ?hD^axB2Ctw zZv#~wPsfb_N4gjNJSERq918IO01I34SV|er}eQmNvpj`U((TFeyZURGIDd zDE`fe)foak%)YE7hUNWE1#zDJ-th2nX`9|wPm5Rer2l=J6A1{5k z<@fn=`jxqyBVsI;TA-?D%=>^H<{VijhwXZqSL_;fsLEO(Y|16Hj9}>-G4JJVNuZjn zeRG`FYl(3&v`jPqmiLymz&g^CB1szuIG|=Ql8|K=kgVE|^L)KP7Q@0h^gOW^7|{ra zOxt=Fv1f8l&;>PE;+aJ|Mll7L^R+}eMvb}Aa>)+#BC3htMq%>B3-A-x;x$Q9coz$$ z96Hr>We!O7VL5D9OH`#tzUb-XU4Kt2bOu}nEFAhDjXWnMV&ZZ6JC{o}hE>v;oR|MH zIBbKrtQB6k(YL*JqtZ|;c7NsnEy6T!pm`B3Rn7zL6iJu1+KY&Y&?ur}-f$+L>1iIb zHYsNjytac2)IyL5X0YfrSx>^Kva2~`XYn|55@z3`1ghjDmUGa)>?mR-&qzfoQ?<~m zOZs4VwL9 z6A)+TR04bKasIbTt4gUszdzPE+55G6`=BhZ@?+kHcv@pC`p+F#JgSbBrgLSw6k||- z!j}i`#$?!Id=11%VqdYPKh7sCET$9qL(rTbC%}rKc^?1CbCt&`e3~T52swxGU4&T0 zJUW%{7?Q?uih0zeIhxR4d3JZ{kVM2KByhL^L)O24@EjNs@>`>ZL3GaH)yeKdUU!y7YOs@^i5895q;{T8IiCDPVx ziAk&aB2#j4aZxA#cyVQ#3mDM&d4m1{jIhwqYe06Tk}VjeufLxja1k=)I;wDjQP!=1 zXxA<aIgh%&_iP842uN%w^Xxpwx>()1KAmhDl7_@31j7}?mkn_g(*1VM(3Lr9&xy+D@O&(O9fr6yL>J!p=V6a5 zqAp@f)k4h#-k}Qq=N-TG1R*|Om1TujzqBR+S!jU<888;&=6NR!}xg4eftzV8FEg^q<5 z$AbpI&DQpQeR4kAldSy2G9jJHr0)z^cnd%Xt=M)NPmzfjjAwo{QN!qaU8+#_M6b&f zNG11nH6zDQ!4^yc1w-_ceIw)hC^0)byYubwyponVoZldi&=m0A*M9gsY;nDnN!i`5 zaoaz^cZ!LMnsC^xJ}3giG!pvfM|z07Yzw^If1C{f4Vu6 znHgu^E%~y>QooBsHW>%gL&6Fe?HMAPar_P+8T#@EVwDdy^2MTX2YsC3&)9<%yM%hR znH-E;@fJH?OaIGj!T=EqVNmKkBi@Wf1DnhFGK=2ucK_tT!9j?Tv9ZW6ci9+QXFiV_ z2$Y309G4nT3rR{)aPY{w+s>^F@IE}2>lZvV+CBQ~IW{~j{j(pcO2!;kP%UxeOxXNv zK>ASj`eG+V|3ONLQBy;pS|(qVX@`qNZ5_=PxANTXCVYFWV(3;HKncP<(SzG+A#o(( z-6&fx*Qm1!kgbw0NrWjBG&h?q8c%CpTH2B0Fn1{Tdc3gYI&j3T(5d_5xiEeD2PR84 zj|B*(MH58yY3qkvI2eW=`#S;NSb|Eqyh8Hp_4ESJ>vpnZuT+8dyGRJ^1RutK@+S4s z>Oy0QvSde6(9%FFKisW)S{M~{#uDTfp4F{?LO|d>#szf!O#{0KW>BMsd zJafww$`lEfE~iS1x)u0Myxi{T+G2g7{eV}fsB*RIAFuV*`9;rMGxfBRmDDg)!m98h z5Wg&M9b;(ruAI$!`04F@aYfgH970QDKSo) z)#i$9rogU>24^6bqQv+9a?MA*$%JvUNB~$s7^lL!qKHtx-bbYJ>oIf#}s7QbpY+I$%-m$6HLeMn7^j~lV zSdz|kl~+s{&6c}n=n?t^;-)mZpY2681Pc8Gq@|_3Dgp6sDVNDPow3nJRxYJ}zrO13 zs3Wv0pVs{uCQcufH3pLV2OuPa05-YiB05D~H2@uqrLp+@cYpy2`Q|3-J4+!4QKn zz^CJ7mKZqVmvdcizHBwik1il${SPYs|*m9{|z4mtm)MBHI zuMNCM#~`}?>{hj0+GG(CvEt6F5n8cj4oetPq2u=IGO*zfB7V zt7f?T_ncD7uNErbZyL@V@X_v0SLTf#zLBxVt1vh_sg-L%=vm0>2$Su-91U4<_64pv zD}ROrthG9S+ovZa>8IEM0V;s+6@XC@Jr_89Lv+dO6qc6u=(-y8^GIS5b@61_Z!wFiO)?Jyit55&PKexFW3LWlHSdR^495w^XI9S;T&qElt@v@@wW zXc;F9C`dj+BqXqP#3?XP-l<)E``?F&k3Dathu)>Jw!7G?E^hwtaz_scOo7dor%}Nu zoNN?;hyH#9NN!J{XEG+6lv9}Xvr+~7kR=@mCPA~2GBGfPruJ9?l3|?bBV1FDMC?w9 zTsDse_F%54gxQc8k8g_|Wc!~Tl3>0V1$*|Y8K}cFnr|VjL&}ydT%1LV)eaS??Q+Q+ zt8DBcv+2WSfL-!5&aqKH$SLSGsPY9TD3T5l$jQ=&-XD}?*uv{tQGP=g%NULxh{lCS zxZ-DEINPkt%K3KL-Fz>0)s5sQusaAkOpmw}AeGWwTsMaRmrSTvHr1)2&lJ2?%gL zfPSwy-}Kz5|9s{Y~edr>>3!hUzpQdBBIwbRwmyG!)(cx|n0c z>`$tc>5pr*Q>)N>#GMeTU^88sMj~F1n?K8SrkxVW?N3ogNQ*|RfWqk0HdON@0R?09 zAcPyck(wWc5-91#W>aN3Vgjo%-S~{dw%*0}z9sPj96p26{JdkyR?Q3Vn{D??3-FW_ zKz>C$PdPPMYB|kj;=34+;L;~f(Y03B4YABZNOyO)+T*-VRQrJZ<+|s2g^aAM4O9#Y z6>=;JD7c~d7lEJT(AwTS>Ukg-HgU3Ve4{Co$N0*@8@-d9r|m^Ma_gG)2FVTR`bZ4g zf1DJ95HP&~i=USrPe^)X1;`e@0;tM|o}NA`!|9`ZfEn#-L6`B60+X($hCeN1Vujwm zg&IQAU)1IB{38+=2oqJfa)(k_4TZO8yWcfh3_cfHgX>1e!2gF3Hx&4Oy%2cE^=fJU z4R_<;ejo1eh4u)!#kcriFE`>?KUUBk=j)}ZUFE8M%&Wy80(-B-v96IB|Gy0xYr zQyX|fNHw+SgOqwJ!&C47CIDZ8f%RD#6YgP`o%>(#4j2P?#5JQM9*A>#EDW0+7b{bk zqf@h4k_|z5Gpy0E-4`y+`>gA)f^T`22b+GMzW{h@K2-M~HZ=x<-12v%0*HGWgU%-Z^8Kelcg5TqwpO?e(9^rS0wt zp5c-aJcjh+fNTSCKYR#)6I!a)?;Qo`foBq;5hrvc#mjz%t!OOk-BV4Vv3>$N(^Z!1 zx@VC_U8X9%rU|vOq(*G|Ee>-VHu!s_%O($LZDh&=pjvCGI@7nH3=9lr0HyF`51j2o zBHFE6S|!!2HM<;D`K?ONgmRyHjRHMY#uTviRquva6U+V@3iKkd zJ}zb{fSX^rGjaJ_rL1VK;s-~X?%wrM5+Auhc%#+&Do2OmpbI; zHS75T?qEElX(@-Mf=VEL`*i@FN_O?3%IDRRi0Bl1>1U06+5BRo&Du&xHKH24XSS3H zQzm>QF~bB?pt{X}kTitI>p{rKXx9VSMnN9Il`i73MOB?|9> zY5<~oa57)!TH$`Zk8{WRqy0-}fV`jJFju~%AhGU8`w)_@9anYHLINheu5LnJ&#_$& zc+Si$pF=Wo>}1E+S0_zYQzx>ag>sGAc0f^b-TW4&#R~Gq4tA&Ac11wkVZijc^pG3X z9D7A-moCbtM5hQ=sLyI5rgLNpG#ltf=>TE^r2(+WcOyr8+TFluoSp}f1k*9Rp7@s? z3K%Z>M6lb+%_kxJY{xeqI*H?#e+Gao_zbAX^uWk2L_Oao)!bU9*ayLVgqTi6MSWqP zb)*q&YuufG5nw6Kvv&I*fZsG8^C^7r zHjs4-1vnb3be`~+UsZnF=$w=n0)YpUYp9gbDO3l8w1{~meaxV(p*S*EScBPAw>~Xx ztPVsGSt?ZO&mN08lAxJJKni+_uX&?8XCmK8WtxBlgoXQ#0Jag+{at#@TKt^ZlWb`B zyU1aMlzZ1c4NRA9Q0G1QkdQvn7C@1j?oSra-yh~bLYLaEwkl@6G8_Pcw4etiI51Fa zJKzPY_(vJAd`zmeTkp_U^+olA1PYP{@o3^Rl?j6+`&C@Xco2S7U)Y@~g}kA*eT;e# z6%ERKrW$Cw9&J)iU}D~>gPx6^Wg`2vY!+zX@AI$y*!N#C z1a@61C#6zdvqz-d{5csSiGThe01os)3>)u{W)B>_>wI^BQ@rJ2tO98}`$yz^*o3^m z-hjHTjhp!I#45ABjVP2q$!>1Y$}%^QHu*4P#1|#b{QHUl~Y18i)r{$-^evX`fRSJau`Q)AD-N?3sDUSQP}La z`!Kib<41@RZS(q0%-7LkF1mmuR)#pisBU2aL)3=;WdP64x%#x6~B1ArIzu535hUmPDD2o$#@ zE8_)Exw602;PcOVFldtm=oFdH)8t!gtup>#w?Oc$nsSi^>m*vRD- z*@vU)@2F_Lz>8hIdi4~k_y(fhefQl2XXFR!>hHh*-c{-t{w~QtXY2d#zaNjni^N8T z(E`m?FbM-Fl(6DK;`QRplv2sD1eLjO-@e=L+__UnD!xRDHEY&PK@%EHXKGBa52^io zlW1JUaVB11Njkt7!Uzr-GHmn3OP8V~&R;CW>#x851NK9_dv1S77t_8<=MRmCvqa;M z<6-|eKFZR@jTN^<3ck||7e5$o}ZZmp!Gq~TKg4+KdDxM>)#I2JWfu1pWoR4>^tj*Wace*E}L zQgIAq!AnN^h|`Zs${0fU4Fp3N=zXt}fvytXvrL=JxWaQ4OJv-{5>_-wyk08fg~D<0 zI~)t2sL=QB-K!xLzp{kiBqSt!DvS#xbxco6Yz(Of2i?AXyPnj!S*tT=&cwr8#=hwB z#OWcHFmCopxDQlV-0YHyTcGRk;lr;B^F=HvLkQtFxCw=_Ew$f3l4PJO_1R~iErmfu zMMXg`F=`|#!cQt;#S(5$^+2YRZG_)(9Ar3*;#+UMm8jKft4hVo8r`^Yqp>(X!ee=Z zgoY487`>B!_wLjVYD2E_$Rw@_hs+{TfnW@U5*xp= zDjUAj4CgW*L#s&h5EW_cxN&O@27^i}KGy2erAv5WeyL-N<0qDIdnpOyPtT#O^_HYm zqL1^wf`dZHesDpM8!uhDKWxSkrKBDhtci-^x(&n<$6qX|I8#wW2+?h^&-nQG6gGUu zvZha;z8Go7buExOpd=S7l>d_Yz~>PSL%sOsn{TE`?JE!^8R!npp7Z5=WQMSUa2X|% zCF4SWONC5U1>bKP;uv4fnKNH14z|h{jbXWvaz>S0#ihlC3m2lL&dVNsvts2mjmBgU zO~*AEFiR50S16$|#D4Kf$AL=4fvL3M9nyZ?kg!m$E>KbDfLkW(V`5_ueY=WzghELM zx=WvZ{`pcED_WtrPmwGc7jcE6S3q3-IzRvX{)F|;>kAPYLwY=M{KOK*RZ7CRLzdQC zlGqr+GgQi)>lspgIcM&CIUk@87$3?>kb*W|G#aQ>S|L>CQs?D}mM&d7qhrU8 zDff?KN+0?umUNsS>?4FBWFDb?Lf`iPO`ZA`>HwD~e0NbyT->13s~g@OU0&rOOAi-jc+|P#qLP!f^vdVZ5i+R+Bm}XX%G6KX8v; z(hCzjJ$za^ALB}o}W`Ydu$QBh^7^Kzy#Gc#WkdWdKO z3(tp!i_Z-np{w(ciK#3}26|c_5QB&cC9FKOUx|vYgYbA=ZfIHa4ryM} zWyB3nO)x{q+%3sKZ<5#`qO%>92(!sZZSQKKVhfG3 z*jTw*QU$ojR%9N;mV;OE2x*ym@mMEEB^F50(pe7M$rKn0q4~F^K$34h&96Na&>1YSkZp_~FW@o_gv>eieh^aeTrU0)xR2l9!iPpBTeae2_;N zJb3Wt<;$1%#xn76xGRb?@n>VmV0beM{?qFrhDYmZLkJ#z_~8P4!YB#frNUq^d>fxJ zhTcNCa^<)eY5R&tP0?sH8T=xH!H|F%LmR1Gr_NQGprs0 zxm;yxE6cIWn#c@>IhZlDk!sbdzB<*s>RKAEH=5u2jm@!NJZQ5XoTp z8O#{kh)Si(uUV_+E&i+50|RZ;8)Wq!t7r0P84QLb%oy6iJMWDAA*Nky8vj)UlGOTS z#o%fxG3Ewd+XB2pjiyybTx{$KK8V3!NT8fNc{0Dqu$H=y*8Ta?r3)>C)xr5^&YXFS z59Xm3O!_8^*uG)Z3xyj2!!J`qNWTlWE1UZpMn$zwf2vWVKQ&RU|9WrC=+S&IgTZ(J bne6`ok+|m^eJ{$T00000NkvXXu0mjfFPUJ@ literal 0 HcmV?d00001 diff --git a/core/gui/src/assets/operator_images/StateToData.png b/core/gui/src/assets/operator_images/StateToData.png new file mode 100644 index 0000000000000000000000000000000000000000..6fb07c4c71e807ac414135805d0097b17a3399b0 GIT binary patch literal 24997 zcmX6^WmH?s7RDvGySqby;_gt~iv*`Yix-#TUZA+UI~2Df!JQU&cXxf~uJ;2;RS9q|OyPmY$d2;*E>KYDegFMH4?2`uKtY)bDo9Icc^IE&A^PCTJP&fXd($P+ z%RpcPbR zby4k+Q@+0-MhL46+AEg0cz9`byPIq(CL~#dr_<&YdDp?{4K6hh>7Ck06f6I+n&LITG9_3z*N3C0j#KZ)+ zmdst6B`TKagq#~$+k>$sdwY8yLo$rd*-|<#k{Mo%Gd1pU@xXy69l>zt0pgJyalI)N z`)|%W-LMiPnkzV)DJh#ZT|00b;TYtIbR*!VB-rj2Bzl^}ya`Gq!SY&H<&2NR#vP8+ z+q%Y1yZ(?_ULJNrT)g#ySkX3%@pN7yL=5t;w1P?biDno7#9RXfA(-;m`&hEIuYEh6zPj?lSoPS8rqnWxMUL6%wMQ7!+;e&d*OALN{ zMA{1P6wmX8rStfx?zSj~L8EInvv01b5{=PM^7HaKCnDNOOA zsc`D)X+xAQ;9{^=m3@D6S6*y)uPJ`vVWxWu3=Hh&G*A$&El%$j`yICn^WaGb!xsS; zc>gVgc{6{f1bq3R7$|)~0(KC8sQcvtIqr-Bmr0}7yia+Xy)(U%y&XC}ZS({&Q1CnE zQV5misYgr+Wui-r-Kz(&h{Y@flxZr6auT8n`qW8{ua%`=&6a*Whyq28L3-|T?WD=> z282fN5^zx+i1{5i{M}BMc=cRoroz2%+}fou?|FNJS(xY|_zc+z=G>4c3$Y4(2cK>{IBvJX|3+av|yI()J&S7A>I7tf`KL9df80CKapplKh#_=wFte;F>Dx~^FZ5Uh?Ef`*yy1m!|-Bs`XjsI^q3TSQyRL8ykU}I`|k!emlF1B zQtyBzVM0=+iR;bw;*G5buBUx5*;C|Rpc82wz9wFxbZb z)2;D)eM&|^BmPRjF+CNogf07%wAFd8Qoplx1{ohW)cd^CeLR)RJQ*Jkk1ipE5*Htz z9uF6n)||MoJNDIR^|J+cj(|5k#J&Ofi^g)MN{?wX$Sq9DQSWqYW~J4|n&cs*MyB>Q zfC;X~1Mla3mxf16s}QgXiQJ5>1KZx;7(HI+6LLEIH(p+EAZi>??xJEOQ5vc+6JZ+c z5>K+>*~=Afou>DpIVfavC_8&X%P9V5^&3Fk1b zU;4?A3uI%SPCD<`jt%lj1L5GmUF{50sHT0eEIlYBZYvo+bz|@nCdXmw!d(ItisKmB zr1(REx{msulVqBcl^FZpFJy=9bVW5t*3Ccy~RqN4HGs)}zp^ zu$exAw?ik{vvfcjJ6h&0xcX$4qU7MR1kZP;Kqk@Yc#)CNz{khk;3v8$)@}*{4r5^%rs(u9*vGx70U8Mw2+-`Jo>vx; z4-FrnQKFT`HZNP?X?VXYWF^US-esb!A7G!f8wDD;(rxe|Q;0E!`Oe7Bkyj7mJ9vZr zUvJm!=Bvp#vh(w>_zsw)yw|Bq;UZOOo^bJTbzItgt%g&Yz3{k*Q3VT3ES}X23)g(! zV5n3b9_p+ov&ZVp2Ssb0cSj=f8$x9bbDNIk3Id&4R|7L}lNg{^qxb1BYZoff+b6!7 zDWxRB*eWY)%Z^(&h3ww<A#b|fJo>6K((m%>@Y5x_`0tsbS)t!-ZI5De{NEnC4Ss)#4_dd>_W-VsC%x<7`uV7$ z<^#FMMnKXQ74UuD3w!%XeGJfye9Jwr_e?ohC&F4cW~CUrSKHme z0v1DYy#F<}RO+u$F^M?}EC0e_*qcX7k8UfF{88*0mVIX`B!Uns=BtgKYmRHqYhf3m zN21W&^AwetN&O6wGY7#Q7S`6*CrgdTpz*8OaG72WAE23z=I+_&KX9{Cj?xZqI{u}F zcjRi)QnTYXVxK$5!)RG0r}{H*6L}FZ`Unq?-$auKRAd9O_m>9$?`N*FI*h!2_WT<- ze3j!JF8dQKgFnfh%ML2@uyAOjN%+nZ(zvbHnBLHd|DM%SgQ6nY>r%(%WcE}4s3@NL zzdwUB*8QGO%YhwG`2O~M&a0X&oEABZQ>jyNG48(J)OQt`s*ljnVd%a3eOcsz{G-64 zlftUfq@m&dR)bNS>*5+U_jN|OcEuS%XAH6*zelY@Lb0-_xh6!3aTK)eIQs1&91A>_ z&07(Ba3UokVH!j((#G*}x9p%))kA#`wuK+r+}|QS*;$Pd^m z7}LK}wyKp(sXzaS0sH|v|0bQTz`9gb$yyS4A{S5cxioJ z{pZm6tJ|>Dxdm?B#M;8*BxxYw#c#$u0PfylIv=tIoDRPNS5;-6h^7O2QBkM(8M&jk zU;u=tSY}F@)y<%-C{yvj3<+PM)J{keow4)w- zKwJ6D6hie`68{9sbAjsWW}2h}gb2fz8RT)W)^LoUGMZGp)2tPWPSyk(i3Kxs61Z!! zq%Pz-tap(los~q;PbbFVQos@Ea}Y+pi+cUe`ZYL>Su(4ShHt#w^v&HF9?~nbnLse{ zAJF`R!T5K17{*WvpOPsSw}>;TS^ScU_3C1HHjG7-O2Q=2*HE`8v}obbRZI8Efc<`j z`-}4R-nbI0*x_r7L2ck~3Buf$A?o?W9C|$Flig4su5%L!27aa=AkT_+^TD4~S%Tl6 zx(6xzUwXbRH#PP`88Pdf5aNBhbBqrLv3r}hnn5Bb3SW%LUN2HTyI-!9FfJX(g7~4? zQ%DZ+wbI%2IBO7jz3}M(J~-Eqq9Mo;p=9UuMXM`qAC!@+}$} zgWbzP3IiVJoguj?WQ>sDI`hB%c#{`?MQY?5qKq9n7215>cZj|eJ_mUWbMN=v7*S#7 z#Kue>!i5i!Q9;JmA*@HFBh;GodU#H+p&nxJd-zWZ43u8Oa)~O4of2;V1jZ=RLuI&JD-al zM2?}nrIO$!A_p{Oo zSBY_KqJKAjR92K$&fNLGH}A#2x!?P{t#_@UPM7rtG`6~*uZt9DW(-5xZG&hobfs}6^>2CB5Lb=#2Ght$ogJEdeqShXge!z%im_{ zmD6uLTSj8jN57&}5c&G0&c-e{G%wrL!4ebihOh0q9di}bWZhcUQJk& zn*wK;L^tynfy$bHQknx2O-T-t)B|=1V_)$M;QN1AP)zM4a4PZ`kIq& z@6rC!bFygU?Cd;+q01iEcr5M_B&+3BnSVu3#9X0~Q&qNk)_(4{*Seo=u+sMZ_-iso zFVy8g4kD-)XaVbL+2y^YJUk;27p$UzAM8B@z|bk9`lr)!6rP^j1Init%U@7!^y5ll z=`LRH*PA^rAp~JdUz?O<_QNH0tRNZf(&uZPU{Wr#{()lUjL@%-m%_BW`#JuhSIfmr zVlh{=!Vo!pJjYJ4cA1h!Ow6h9&c`L?&i&FfxXf^EYn_Q zC^f}Rd`STcU!MCAj&8$Q)&NO_cXi6z-SIpzu|%puP%y2tx`iu5Ka0 z5x)2X`+f!#+G@xB8!6GPOGI>1{;5*!3Z3`Y)1i3w|9W=+6bUMVDx)HN8gAOZ&F~e2 zRe%VC@Aons+yg+2EIUV7Vn39ps=cU$Zd*OOAz;A{K5>t3lz6ua~jE0F{ z2?>i_nPKR6juK)NPj8sUdTa*G_TO#?NF1VZG325*+VLGfA8JN*8u?-EMlfU(2IR57 zv5AqB?>a&klBj|UkrUU+UY|PK9M(nuH6Yr2d5{u7L_o3pD}R;4l)!K{yqLiBW^r+8 zZ0i)^;Fot{==R}PDW@rOxl^ogy~*`}vQ*Qz<+&8xSr)hUEM8R_Xizn7+n_dsN+u&O zL{sKZ9|D=`h``osFtE$LUx0-&B8{-(*igj8Nndkc_f1IQ0E_AaENVJ+&2WcO3$wluWqVu=d(P~m>i#4Zm=Q?5nK|a-3Q)Vs zfP_itX6hL+vNMi3N@Get+RBZuGhuX4q$0)x=T$ ztLTAGvB~sv%)qF=>*;L0n*&yvOM^7u?#o8c0;RQh)oQ@~+A}2d-M-~K8yXhed5|Ba z$oh*}yZl9dt{(;1gNNIvOU>^(u0N4pEC+!jlfJXN%Zr`GLK+lso0cpUG_Pc3wU?j7 zqAQ9&!7m1)=sLjk_iU#Dj^GEztk3SJOW!O011KN7#uQo4!h-)pd z=4n3mp@=1R6m`I6Q0ocZa%stI%K(tZ%T^ReC+~IRd`@eZQF|<$1plDQFL51xiS;hu z64i7b8~sTG2P1~K-au7nbrk=fM(u7^%KneL3i&PzaXGB~GfPnsgStDl{NKHs15H>l zvqnQ$YC4=(`3#y+4`!;Cn*~Ld<`B+bwSTZ9p`S-S9G2wFygwb+ls}9Fv4T&F!eob} z`#+3bxQlsC$OqVd>owuy!$4;JcX`C?Uld=|HH=%eF5ds&|WL;#uM?Es5$?Rm_*UCm~SQzj!DeOjTo|_>Adu{;LDLBBzMnZh2 zmQbCWI1^daKks-xE7;nsrBq@pw8`}FY8~>*AE9HGGMmxR^nck&fCPE1E@?(jUXO4X z-kq&xT<#>OqI=!`)737j3q>e(jB*(MMfck5u)YRLCI>O3h!0Il_7r;p1;K?^^ek1W6v1$suL=@Sj_- z_))u)yE#5H_hlvYlcPgJV?_&05jiF^t*mNwCV@e%F8gjiPlqL9_*ooZ7*)nR_a};S z_&w_Z$ zTDnDi^HTb3wq$atKrXg7U!y~{RB}#xDIAL?xzTR^Ovu6XU!67I%{#si27YG4a4(IUH0yhfdMm%LITW*Il{01K#3Ii z9J+1RmPxZEs(jzp^ZF)am5JYf@c-HQGi2xU%S3)OneE;TTfgX-!(v)9Sp7u{72sB$ z6gU6E#k2$Xd<&tzAH;g^XSo;u_cg*i7|k}=E-LjK&N{Y#3c0zStvr&ngb3ympA1iQ zNv@y~wfW1FLI3c@6vs>cQM#{<^+7~PKw(te`U^Joc zM>m}ZT4j&tp=jnyP2Zjm=c?8l1wA$pKN^BOaRT|a!VOT%mKv;85t3rhfX+cS!T#4o z>kGwtPY@ypjyo*%{DS;~T}`Lkaeo930oZw%Zdb6$PMFK%Vq;v*FRwx*t2(!I%JpV} z>*MyPQndke=5fU>MbfojKXmtkjUABov8a2iI=xih=cV31&$n4te4UOcvgS|XtcENb zvy!TUOx+9D@&~E?-)`s83Mjk|^1!h#$!AWc(ZA-&2A~qjP+%(CU*#LX1pkp;gl)Z%SzmU3=n2TZyhI$vXKrhW?Z6C`zW;Um=B zW5^S^3!|rdy(W*wdb_Pb9K;~xH1RGNm{0WC4bZRBZxH@;b#;ZE!ROGW`l4@>3UE{< zKqBchekqIOwVPYvv|njyhU5_NBo5x-&D=1tvUXU^7AuqG-2|%pI9IoywN(KcSmiof zy70vY>_d}2uZJvS!7JijNYR6&GDkE6-1j3xjickF$p2un@KjDBg{YI%J8tEn+Eozg z*k1H0Afe84hAH;eTaHR=j94BDotB4t7{pvx<~R=VdFX_d@Ey9|66;#8_$3XKK3k zRGVdRG+v1s`-^;o5E4Db^YdMmW^w%l+7h+Ly~J5b(}mu;;SclP)B7Jl1kx@U~jx2c7KdY53ww(@&fk-qXP z=)IOJ;u}+@Qs*5X{Vm6FQ8r>y^8>i-!Lnm<{eBfv2W`N~a&ygLGMfQstf`@tvNp*K z`6Mp;fz?u%uXlz*0)5QWY4bXJOPc+b&Sd~#QthP!&RRYsVn!03NXiI%%y3Q`oa)c?l*8zAN=)mu)r8*SZc=P$HD2g!8 z7SIE)X1h16cL44`Bk&J}-+~*7We#q-Uv*#q3kG;&Dd>d{5O~9x0wB%to_4MY_ z&vU#!--CmoQ-(?jiA!bw00dGfnq0&y@?Q6Glj$zC>-nsG0*664-RmRABnd1oLgg|` zEFQaJE@s>6=B+Y5E*`yU6i-5=u%9q8IsMucpe;PaDVJ1yS07(@^sSVbq*P0|$#1Z} zPJaXV3YpjKQRAptU7|3>Rk&C@Y*APsnq%fxSEWS+gD`t+MrZaQx5D#9p;I31kgJ9S z1>+VH$1g^QW>1f?)M0{|egxzOt8p*SouT;Hk#XyFa3YsT*T{5`z!p>Yc#hclqc~pFE_KQZTW#P zBRuq3vt$d7=4sJ#B%u7!vW%CU5;4ECG$tt--G4q!W)PfL`)=Rzhlb$#t@D@H7(w?s z&G{_9hrcYV?>O@#7;J}qJIZ3OyQ0eBf*4mM1E*GmU=6mF-C}Emc zR6Asw|G=}zL<*pE<)wB$zJ}DQ&|s#{sZVTH@Hip3V5(B+%bpq;|Mh&CsBDPj!PaI= zkwQbU*ai+S)yiJ=iPxBi@9XWnf3xFe@3cjtw8d&-DnexJM>>jGmraUhmv}!Y3{#5K zV>HM$0lUx7`4QV)pb+_%V&)YlJsk+gQAYbjDS)y8819@FVg^(^8i&J07ZE&4H;x1o zi$KTg77c82hhn&z4I--b^A#@XL(!g*7kHHHZ9fGWaxud=9VA40);|uJc9@(!a7J=$ zlIpW{zi&W4ryCI>!D$|o_mKfxL9G$pz7s(d`9cH*0(E+v`jNMR5fhgxxlotoR%0{2 z^uB(ekJpPFcdh2@SQ}=LPckq=tUDC8i1N#hf7Iz=NB`sy$(?7}OAN&T^X zoR)h4Ca{*WBb-~q>+5KTiNsB4f(mAcUxEpdO6!GYKn5%4o@}j0ykXx<+~)7)@9ef- z_?p67q``=iiV?woxmkIhP8|hZ-P*OSF;^+ST!o_S);Z^V7bLz2B(a&=)zZ=`dXSCh(PX<1CkT>=CmmE$vbL07?zA}Ku%&0`p6L)N4yt9Lo@zbdWjx1A4#{Gt8iHSp zd?@1(IK?l9HwI!zYA8f~9&j~+)gL?RsdIgb?>F+KWIEffb^%JhQr7g2a4|M6L0@8J zvyVpdk;fMhJ^JcjM4ukEnSdgylXH7K_r2IE<3IVjOwN&_{%ei&>UA0Ekb>$ZD}SbHCWw2S|?kf?uRR=q{2E@RE%a4D*Z=#}0Z!t+5i8e5@n4fx9N?CwsIrqiH z>Gz0j+Uvj=pOqTn==bjpxj*pf13=NmLjk)CcwBq5*aB);+ODFpaEU2_*?(yarvRTV z1&c;@j9FEwAs@+;TB9bZbcIz1HoYB?|1qA~uoKDuT(g3^I-UW=yMAh4E&|_G^ChB$ z-oM3TI^S)%`@Ks|q2=sWFm!%!qanoW?orMk0^_-khVx%Zj8)o~}185e># z!h88+_V5z~zbgC+p@E-fy#VkUR$6`6eXhrew(=CAal!I9nqJ>B(7m!ep@Vo}XbZ0#$ ztaEA~LO;u3h68ZK-=;#Rs{ZlIv!g`dCL3NhaNaDNSDXgPYS4*3SU>_`5$*d-hbG0| zSA2m!r35Ug;Me=J)l;=Tsverluk7WCgTli0mZwXN%~2{?c~08{i$L%qW(bv@nE34v z$@l7)|DbUz;I8So?2VPx61c_(^L~fCg6KpRk)QozpkQ`$XsGQ(ry+^lkfITKE6>o|&9n{ALdL zN|+lX7`tYGf|I6Y+*Sz)7#$w`_IcJb-WMjemvCuByC7T2x%Eg)Esm zTuezyi)-w0A-xTXIlp#!5RGFI^1s)Fxp!{o)N9dmU;ftot;EVZ+k-J5@L`T+#tg!0 zulEx)bts7C{0!Lx)Y@cY;G%e{FC_&DcMeg?P`4i##q=L@FRy}+Vaa0|`3?em@m(I3 z0fpsD2ZrO^ENlpGK3M@)|7xwxRBlfSAFwB&tH1tm1)~X1|HU5ZgsKke9Y8%FPq*3u z3_P1o55_IS+0yAqIdV@6-t>;r_;@N7ZmdxgCv>L5QVv`?Z9gbQEly>-w?{@!ZT739AEU%ZLjZ;+<_VmK zuisYNzJGk8=irGGD|DvHtn8nhPOwKk^8aVr$C?0;7!$)S(pA(>W{;v_Zk2oE{*;~f%26M4C0ewA?9gq z_V4ceS8Lj5%n$gyEYkw^xgLMp7hw!j)ufm|y6G)>3wJGsay%wYQX<^VQaoyihkIl z4jU(v|BfV9ZNa9nTIk0VakCN^{H~r_*vm?ceRAzb*Uw@%)80XLx{yM14^{!RwXPPD z=BWlD?ho8Fn!`QL*EClz90VU>dJkv5t<>gyt1<4h$`NvBFJ!u8i44y17xhuv9*F63 zjRcE2a=Pk*WaOCQnAW|HtB5c}Zq)kMuU^jm-;baETwqOBuD9f6DR3UG)L&fI(7@#3 z+SB>uH1RO{XzaXQ!7No7G&^)X9aWTOylX8e182n3emY;~l&jgu&CQLz4Yih-ETX{n zUwISo^>&T?9(Nh+{+2pb11@HXJP~eDVd2iK6Nxrw>KrLYn{Z;8QK6xa6>HP-sVMjl zKPh_7I(@?MzH&pyIiX)S3GwjgrMxRLoCLun3m>}g#r#{R>jtq35Z-Kdx_0T zi<4@#-lM1W-(fXB#gTCeTncy1qTgCCM9sh3?v_Lr0a63P`}(MGEjF_S=+tlafS-(^ zQ=!8~%`KfG)cx*PsA=}AHFcbY8A~*Wlwjm*Q7}6PJ>*`n#*&8ZKchZ~Z`F0yy(Kna zOs#CFPdAw{#6TgjIw{gel$&`-)C_l@2jHx0T1D0o#S34XqN;R`i&RP)B4;i=5&`aB zxp(~@|BEfnAudWIU6GD~p``m)_a*&&W2Ze|wp=zv0o&C}tSi-S^X*2g*qsi@y%PE9 zffPmuDoWx%@oWu8Rk)5HJAp8f&!a#|(d!t{k?!ITv`ph}-4ZKXx*5NXTJkH)I6mDR zT3G05I7ce4q9E8FQlguo1>3wn-F^#0B^TZ;$?-d)6C7lH3WgKm{pUM44$H9U` zAKyDyvg@fl^h5Ev!K06Jag?OYU4lJxPbmcZ37Ga4G-V}HKi1KM#DhOghz zwbbxf&4Yq03zu+`sk-41uy<__2i~xIeq&UwrqzS zumSZ14#vszj>#su#l>-1St1)i?dWuw$P{SqRenN!LHpUPZ~~T~=H7&dpODMX!@x2} zdUWsO|7i}$z)K#0VzavjG&?1`xym#YrNpdU_3E4M=d;EfpZeb{_*7;n_`@^YYEOVP z1wIAx6b;HRM3~V^%RmBY{^(!DyHrwo*cCL1mDDb6d}1tUh(y9wQv1_ASjmb)iGewh?yyfLJKI^n>*k9Ao&V^s@5gg7 zGD|e78`%pfAQZ&M(d%%nLi-t?5Z^bFz!zjHi2f%~;_0?YTxANz*&%VB)`HFL*KmU2y-cXbMkjYW0I#Y zBwS2>CUV$086QsN2EZ`)dgf#3fmWUx>2X%x9Z`4$K6e(omJ2zA!4Rlk-|qp{_$OU5 zAV&HjPMW;IrQyu3s{;3I^J;fA$FSS&bk#-lq1JuUz}}ONiRm!h10@Cp&KCYPJ3HGi zj)kbYw&ho^Lm~;1ZZK4LUT$fig+X9l_?KETg78RC@%3b=4^f6bghfS#a(95Z z*A%l#RFvr@jWg?W*iSz4^vk7VcdjrHWq@tE#Rw)mV1DCgb--0TSaon+$JCjV|I~++ zs0*}|*`=I+tX`jDPmeqY(k$x_Ko-h9aGkJ{T}IOi1dDG z@6&^Q$K`+A}29%e7|X9N~&RxiW(OCU(f^MvbdtQ6q(wUcfGqc`P;5z)!XZH zO^J?vy$7SMxVPt+m5X7_?(9@J0)dgMdUo;62x1WBL6d(oo87;ZiuRkcS!-oe^=Z7V zywX2oA8pT$JQ@5W)LeRV9KCBwOF0kV+j)5;1Fi~&>AIx1JvD!-u4%`qm8y4oiB=}S zg&a@+)uPeMG-kfq4u$KC9f}==S3Y&5#r!hzghLyuwX-P~HI;L)LVkBOxa)W{S7YGb zH*VDC1K{GirX6i9@zlkz$&(bt1xPqNT8y)m``uaTgMc-d~^Hgw+WMt*0rK7XSY9Z!A)AZST z=#-ymCq$}dd-RDlD02KSx9yftS74kB+`uPvX&f8Ue8b#QHn9#sVZ0 zDn$sJ0XsJ#YT$+>Mb7LgwHT+j;tgC*;v-Hp2dZ2zyr{g4sEV4+NuJgS3@9SX{TE`8 z{eJbTzC-7u)x9;~)<|XIAg69M=&_cEHGKIZje|=cgOVa8My3dEb5&SopjXZ25DbwS z!p{jE2!U$m*~M3B8G22K3gtlPDHZa#FeyMgp683{|E=@sGHu*yd~0?}O`N2Zg2coO zLO5?YQMYSb^)L{Yq3 z@QTWgCOK-G5Sk9`W-ekoFx1zqiI}J4=9C;=Mv2yG+BM`O_i= z<=MVx*EuayrDRdTAZ6AjA8DSZ;g^u+fLmDK*9Lty^e~)#gusgdlPheE`SFj+`Yq&y~^5FS-G*|tVpP0@de zHa@Tu1n(Ws9|5w|Xg9d-Jjzz1V6C?Y=RnP+WT2$CPk^=b5sR zz2L*hP0BfbR!}IDD6J#V>cH26@FfpF!pQf|B!Xy)A3&fh5>a8dlM1=FhfY}u)A-?S z*Fz@6W4H6+360O%Abf_L(*Qn&hdH!=Cp{!0fBDyHK0#qaSv5d&OUwVDnCqpU*+p1w zx!fk`7JRk*mbF4CQU0(nMsN1{Nvitczc9h$)hLH9@A-}{*!T7ez&*jqdmJjXxH;Xu z6qSLo+9L*+d5{RFth8}%mv*<#$3IFOU4`Wx^;2rakr5nxdlOkgUT2xD&1S``WCqQR zON}npbHJcS!-AG5^-yqW0M&vV!v47Z*HbqkoO&dtdP*%rc}_~~cnrSg(IT3Pcs8=( ziRLBOj~U7gsmg*Ftp=a+#&rtR{Vae8L3q15EF03-T}2*4McqweTdvPzl`s1ziwSjr z8%BR0OIoKWjEl=^ii3mWJNJNE-j?SQ8MWF*Z5N>LiktxiQFMbOVPQmY`^7z^Dhm8VE|v-%H2zkP{L+bH{xCr@%+95} zLtDs}?qGZSfw#`q@Big7naDupC-uc=b5#amfJbiHG#_CvDS8C!ndw-lj8qo_0tr^{bU=3d~96aX}=6e`G^*?1Oe4fpj@jg z5dQa~gD}b6@M?N``e&0my@lSTmY-X3Myi9~5P~*3O7~>mm6R0(#_S0Z0M&M}(`9~w z^|zapx<&WvUB*%I*)KY!VpL#G#-?K>m;bhvgIcA0r?7T%soxjP5(Y0&Z3~0D1GUxR zfZ)9Q*_dLw>=O_(Lry#*oFi9&@Y((?om}(3f9+bp$Ze*se#pJSBq)S)EiGcx3H5 zZ6O=NOp5n#D;!!-J`@IXzLZb!54XaZ3zE5%3!THi26)rh()YD$jH|x%wY{&2mxTPv zA=+f_0yurAIlw3CHhiYwG<@EW9wj(1smc5qapViEh=^K&xtwQj0KB^+BS>|5wBQDu zQD{d-f<9^e?gvkY5ge&?S>*NhRQGt5t7Ui8#G~NiB_jVcrGRm7$@9mO)cG87;nG6+ zb>VZB4o}a5X!YP5V1&8qk@JX&bxS)^JdB?szdLok7+Wwstq{ERaI^raS8>9))(wW2 zUeS#ath88Gkx%>5&w=b*)0IMwCg8mRWs)Vbx7(CjomoXrP&)fGBhHS~&Y^XwKbS<(3u(qY!b#29$Z6Sop?4xHpdMZTeGP*^FCQp1)*NlEUdx*}E#|$xh zKYqgg&ggrcJ)~q*Xw`Gxh~)&`o6Sv4_EV9mnOS{7J`61z>D61S31vm8g`|$OPbsyw zJZAM&VI)(WIF-U{pre1w25Fi6q&D{Bt>R$e8x^c%Dzxh3)qVF{FGr)u$QHgiRl|p=jOn8uSW7O7eHu0rV+6*)bt!5>!UZ7Kg#yh;YJ z8XIY1)1@YHuZ$FatxuyQPe2sX%r?f5n%uNMaf>_s6MNY%o^s1pU#`3+(mMP8?rzal ztrFf3Ev!;3-bi%nPW)~s)k7ywj~C>7)g$^_F~aztop56BPfE&8d|zR{`#uEIpq92y z6!(GVDPj(SFtBo8IQ`sb_r|}q-yd`ZUVR_#-5rUn;jg{g3=yUi5HRCzPfgpJ)%56_ z{4yh1!MNo~dZVyer>^ADu_{o`?|t_b)Wk@uc%Q&bMHkO3AWvBsLPC8T)vllp($vp) z?G*Dc{)2B$MYUk+#bf;qt}Skcc5SEtS25YR~<1cc&E7Jt>FeEYY?eg&9u3@}8>oJiPP zubX(PNXOiyLT;5~eDVZ>;X1LJugun#5|7#|n)GqlsQB&U#mYMB3gRJe_jTsl;MBL7 z=LfdY=%Yl6`QP!ql!S~vT6GQi*xjxkIzh>`?-pQVk0$b6al)mN~5{nwLmfCrXqF8;+lG?fxKf`~As;v!Ic7 ze~yzx{6E{DgNk|uJYQ{DnLiTl-~Z)S?e50~O9qP4-^4(EF``>Dy>ARDV-$ZfVVWAS zYAAnudjL|LHCWLSI85;L_Y85tQNMCD;%Qzl5Ze#h8$VY?nwJMczjnLQxN+z?R$|nl zLi>#N$4QTH;Axq_!tV(!tHNuuk*bP>uDjGErD@Kcj4zpPks zZ0%gH)0i{s-i~;AaYO zRC|rVPgSj}EoI#CQww@~VXzCp?H2Y+++xAhQX50h2wKIRE9W_rx>POLvH`NhJpe@5 zH}<{#S7Bbh<=i&mPpyT;nULBaXQFm%n}!SiY=72;dNSAsQ_e4ZAjwE1hj$$3jMrqp z92tS9m^a>qO(D-xdUJClHJ-uuZoc%}Cg`I56GS%}BJvWRqa-zO=1BOR&|UtmVT=w> zkUdFlGdTtO>kG!F>w-30{l%nMNp&wi2%{hzb&O$G$n?${2}3~==BeUhE#j1O5sO|xgzR_HTuawsHjKz506WHL!BE}9bK_ngm+MrOk9;W8YzJn^ zo(@rEvnHh9kPxz%1Qe`QK3zuADOi7CIEtoMia;vr-=RTJ^hen$>v?- zDC184Idpy$Cb`!v*ow1-K~lx}YC83|2K+uH2L!6C05cb50h(y09-OBJjSB>|mQ(KMq*wdO({?jKdUM{;BVqO)UGIgmGjmAiJI$rIRU&+PXlGAB1pT>U2rbV#f zD@D+bNITF%=s~z7IzD|Lp*06chK?vLkGNADlYXb6p&>{@~KkQdvC}GOnC- ztGENQNKVlR-A-}!=R!}(wi@-vA6N(vv}A-$AN=25Y=E4F-WWO~CU<1)l=;W?NSF}V zM);<(vNFqwe>KtoM0_iq-(X%XO8NKiUsub|>a*Pxb%~mS_MLhq*A!#FGR7TQjr&NJ zR6{=*BZ}6oD;C$3cpJ(|D6jV6TvI$q`NdA~lJR zA_7H>71-?7!IpKcu-xLb9R!@6%o!kK?3tfH%~kLqzj=@~aovXr)r_C2Ry1BDEUw%` z6;&y}f)<&$vy-6vxAT*3iOx48HVvKc@;32h> zwjJ>y@ie|%7sjmE64jYLl05G)M30`@e9~Ax(|=z$=uZ<}cIv zOhADGGYh658(97qC-W`K^`&p)g2BTly;n=HJE(ou8Be>FaF`Y#ylG28eC zr@jG+f8{!>lca$^(ub32y5HAK?^a^x%&4xPLs8GNEELB02yfVJ*8~_z*-xTMxT9sw z^IAr8uYEG+*X)Hj^lm1$d+L9|(A7pX$u=(} zFBm@fKr~*>TxRo#8@6w2>^iMbpF26%eN@hS5{P-Y+J0ZG4X!>L+S4ys571c@%0cWn zdww3I z0hj7;JTMS6?yMl;dAJ2q`*eN^5x4zS(UGcx*B?cl|6~}r#!4Kr>DTWl_(al^qsxC{ z60J`co&HJG(W7Ogql2_x zG(ya{)5hH@DXl;JucNc@YwGW#xO8_ng2YH^q+xV}G@~1-0n#u5>4qWvkWL8!K~h3W zx?7MKt#pp`xjy>~w!7CiKIgp8xwH(FjcwR?5WI*Ud5myV+igqL#nf*-%6kk{oeQz@ zb6hgopW=z|=1Rpn?!;=5+gg@dXG%o<-M)D7(*+7T(_dY-A_0%!G3%#D2G}U%*&-o> zsM~xl5laaY(3v7YUGvovz3M5^h(@!|Z|N-Zp0LQ747+OZl;X>g#WCrK$S`0Go9+Sd zcO6X~oxcjcgtU~h_5!z;Vi;&CCXv_8nyk2~4wGijbZv?4Ltv=&N;Xi8FVS)I?F*5A z7__neutv-3$B7hflp&4~Aaevxe*U{b*{&gM!-Md2>eL`c{0A`9AG*DuKQ(o#L4HL5 zlB6=)pNVCds=zwE2F$5f{83exa-g!ZW>KO3p~G=nTKzerZEw|sWdTvrr;U#mN%0t- zUbm^4fMkpusNvb+7mQ%DPtXCE2!eNt`Vk2OjWv=%O z_k1U}vbANo*AHLkp=R69d!0P>hSwtLJcbHwds>$~ZT7&RGa2`<5f#6vE_*4d`*vu6 zcUojt{?mIiSIzCki!BNq9jo_vMvUnEf-=aB$VFhkx|WcnWPYFJ< zGr)V@PFi`$tStbE8sw$Kq6OT>Mf;;9YeJIf0nV|Y4!#B8k71)#*GA({*F5}YzF!1v zn7bPQh%TDUBpdcMKpX+*l=<*I=*qK(>y9Ax2gdski(I_f3^)?lAJ&$>*mXbLbpMrB zmonf3B;E3_zbp5`9a&us9D{DUuF#-25ydR+b=F>+M@PWE-^KFgL}0pY)jEFCdP*{z zw$G{Ma?S*chp~_r)n}Hi4N}Wx&u)eP#q4vIjbqY-&+UQ!D_~rCU77pJhkXkO82tD5 z;X-F&ID^BSK{o6`0=t}*1=i%yWQ*Uq_HKC5phZ<&yCP$pgPO;e={me_bZ4&Z*YEBt z!-Ni}}`fX5NZAh8+v z{Fe@S2xB^p)HCO{RPFbr!UeM=<>)BG{q^2UpVna+zZZn%$retj23q746u`~^1;K)7 zRE!ZAXLf<%J{{m=%vkyP`QyTjpI?|PH8^Ns>4((7-l?!FTFSx0wxsg+AH;@6Mn;x9 zgZ=TYS=FaHF=AJav`b&g7kzYISDgG1%S!>lJF+X*6W!#*6=bJvT6Sa0NRGD@P;IM- z`-ZQn&2@afSN>3bLyeDl7y!sQ=|J_0UvjfkdU`spb1=X!Z>K%5DndUeHc(6X?*9gS z*{l9S^Sl*SK368G7(#~M%K))l*IyuihWP5zSP*hPyvdpE9g)YtZnAxYtsHn;A})7Y zI--qpOLo!Fla|HNBeSe%Q)U7r|aH1zZ4w z;LSPr{uvOe@SyJmS{nWUIMxT9u;0TunrsesT%4Tssib1Bv!%${!&eJS(bavan&GFK zK+22(c=nHP&+$Q_qRa%`6PeK;3(!iB<-!=zJvU1p&g0n zuum9B%gu4sP}lSYvJHEhnOrUy^VN$(8ft2wPlk=VXBvEM+b=;=1w;IX31Xlkkj&fu zNJL}-uX*!W==HMG-v5Q**qCC4IE03_#;arRbTy&@6&;4L;zdTm%$SteT*ODfw#z#1 zh9S`p!Ftc#AOSX+%HoqeUaV}r&D0;z{M!&2mV@ElQfM1tx)FsyDp5CNd^v?V@cyX9 zib*=?QoJTOEjxRtTmi9}xkh&b?}d#PWF)Zf|E_7`Z^>m z?(IN~gD#z@TYxjbn{mew&|SCg(~GEAIH#V*)_*%B`9m~1Dw%;un!1Q0p+SuzHg~bt z5W?;4os^Fl$xObKAUvrc-lxTeGlumMnb;k^peO|Z0-9o*cAv6@db#HrnH7yxfde*8 z0!8dxWVu8=uO7r|dxX5+kXv()hs>;>I~fM8HONu#MBF2uh^6opY1!F*8IMzfH}SA) zj-$)*Od}s9=w#Qb;SZe0 zuJ3WUl&Cp~@lBs=1O);VPI*F1iFWIbbn#N2--$0Z6r2AAXZETf0vFA^W|^6yQeTg> zGCkZYilS=hk%;P=9*~)Eb-C7d`J+Cf-}o#sKr0j>+F}a_c9SM+iV~r=1eWZ;dD8GV zipnxu%z3JiV*pGMQHQ9uPRQp@iLXZv8Ij#6fL$3>DXdCUsqmY6DVSrv#em~jI50S= z)+Vt`*h_Z3IySFc34*{6QIeFYyCUy8qFY*0WbXEgqc?^Gfq~QE{o&Lin zc>v)n^<=Ff+wfpP|0OrvaK`J6{ArgSJf7Z!RA0Uz&tQ9T6hmoEWq*4411YwriHk9X zW5)2ni92Z`k8K%?3P^JYMBgUa!ddp{f9?ZE&pUx|&Zx&#Ea8}|byUjl)!)^wRDV!} zjuZHu5Oe(i_y7Z_m~)Mi61$&l6cs3Z#u(?4$p;6W$!V}GJ7R;?D?^nzKxt5kF|18> zmX?$U8;8okPfEvT*S_IWrpj3(CDJL8=CXJ`DmGrU14d}win*dE@2&4WOs8=h522OF z8$ZUbri)}t5(_k-P$UHT&BreS0s;YMBJl0axN4<1@yocfM)2=iQM!h*#i59JRV@}; z96+H~Z?bO1R?Z;HG*;r2u~*=fnfno<#4lp-*(j6XTq&ci!D(OabZ;+;j#5o8&2T=8 z%p1MKg^HV93`E@Yz8HynK56=rEmL2Ky~!>6?Q6^-osQQ`;Lk`Is)^gyDtg>BI@0<} zfX2OXs4)r3&2B)l)Tu|tm0bRchT!p<@0(xb>IX*qyR^_`aPUL~jT3jwj)=<-{xGBP3o#pSK?U3j{*8EKw zQ21iGSYo_BZWVH-#^Lu8y3A!HT_%=a%%ryTs~A}bNOO&2>2A^6`@#7lr>AVNy?h`zl(TRvg$0f_P)IJ z5cf@@Fb^UesF9~=U=2LwtY%5dKUp3DM@`Zg~BF&9q>2Lzyz1?5SBa77Wfd)CMiZn2^yE}!S==F^U-qf z@+num>J%My&V`0?$JdWL$~@kPejKJzAsPH17;KIYS-Pc8S8pLkH2FI2D1JkP> zo8ual3y|^%CHCAMWvC)Cj8Q0LPin0$de_I;rb7pQ4$q?dqH1Yr>B>XR5P~65iox&{vc39c+w(VgQrWVbx$708qCb~k0BLn84hc*G zkOs%%hyDA0l}mlS;_O^-rrc0L7EIy?#=#!2%Z@6Ab&+CoC?^K?E07Wn4_Ar??E7tf zb0e!REhEJS7K9_z=mo$Bni_>vqXfSj+VJ8J2|eR zpJ#J#2wR~^IqH232vZHg&d#sFS2Vz!c=@6k=B?nV|RKa#Ft?IE|VU%!3@BPs>7fUz#w41*MBiP_LZ$oB+b8Rlm> z^t9ZzHrfBAxIc!+5*Z>$QdUYhG}!E`xu1I!ba+2+I)6TNF)k>0-3=T(Omm;jZ{D1J z&E{bD$w)_Js05Ur)#k;&pX>8Ae)NunTf_}ikUy!Crf@M!2J4iM9%)m;qLp42IJ?04 zshd$!#KxTB>^0{@L8E3ESEE_MaU+sX3(;bUSA}+Nk7B&`Q&(bC zzO-zWK2{g_aIAp7uEM%*d%c~^Ao(Qzr*F9>D5am{(EJyxO0sXIRrgcSG%sgi(M!wE zi$|fNu(N~skNZwbKZbx9LdPqQL*#Ug_ugYjF{uEyvSZrwju3Gy9_{S4RRuT~ePn3? z@v-KzYVt)Wgb1dqE6vwZt7K#}0mNN@*$S8^H;@^e0{vE|N>NdU(gTd4Un)#=HU$A@ zhKD51fi)Dj^VjCLi9djPS`FO#zx_J)$?fH?CRdkDn7eDHq{!sqz$vAPI>W-IDG4QF@Slc$tbe@ETWtF> zMZA!&B%W<8N>)lXNg8Bv0W=EIE!HvUe>M|ocKvAPIF^03i#&siDQ-W0KVSD=3)o8f z51@&s#Lt{o&jCbYk{Yzs^$&8A9c%V|dPK2lY za@Ru$pSa3H^6Q|y6xCYy+1jHCzF+O`|4F$2yjQoGE&Zz7Lk#ioPwzXZaco-VCOvxNN5OT`lx+SUUc)I)J$&zOh6mMS1xuaLvakXGB1b;{p z>+#vRy}kJYDP5mXfFr&aQ18?7A(O;+?pv8aolGh~OPoGP^Ne#0!8)+mY10?|W*;RI z^NQg2#n&7WU#H|exw^0=%JnZ>Dl6mJuYBevyB{Euylms@M)`wof(=cOjEvWA)FwbE zz|3CwKa1hYVbO+>L&t<=8}dXx`IpEJb&-!_)Ry|B<^75YnTO!vlYW-Q;rAjo7fH$` z<8GpR#+CXzg#jk4Ki&<90)$o!NUM`lV#gfhbrIuucB>N9DePf$Wit}sN|JvE*VpYs^ zMOcVNgpA*1Ym%#bUj3eD$Of~q+7Ap?ltJ5;gnwq*)(RydsgZ|B3N5B!^ z43L-x`7+WpHez}r2|BDlQ?EFVEwa5ZC~yKG-m+Tk8MQzVBoG*U-l)j-h1t+l*H<)~ zC8WO%K;g<{hKlDRS2bRT(C(-M?Vw3i+Q!)H3KeB#w>5zzb>!p}ru92`Em^V7U!ZA5 z1a65{gm$KrOj!G)Xqb`VBJKj(b-;b)V;2&U@Nqd@9Y2AoVzyGm5UB5DJ|#tyyS4EW zv~<)ZyBTsS2wnKEMsEH>?+2ab02SivYE9#RTtr4i&OoUe{tW8y$R|$ysj(rhGi9oP=oObA78hXk-1q7+e`J_;w&-eXd_Rx@Xva z$lxKwRWAZ0Gjlh?hXOR00TeQ=Q@}brr=?xW&iI2^4?u&;M6y$Ekh#C?@r*m|lHv;p zGx3)qocVG0afc?XAaC@0U8u;EfBj5UaBKby`cks3`=I2;p=Y8$Mpe=?hP%z_mN^P#P8G; z>uO*-7%m4;6GyM$)o|m%vq)!`H_mX$A~TQd4;rgUZ$7yMwVH&mR%1TV(84X1FUDkz zU{D*gAo_CT(mhUy2Y2Q()slQcyA3>H;ok4|f|X|V7OqF{MjCdNJGtu~XcPP{aG!SfPt}I)#K4( zjGK@G&%K(EJoyOM21;E@hV0@T#T#}X#mN0{eWF3I1FSmk%A6ih7i z>C-i)wbsO31D^ttM({vg>O!ipvHfYCU%@Be-+B;H{+0jB0FphhmL?UvcK+j@B{Jds z;ePG@q~mF$n=#=1D`#DDz46m|V=L*O)yDi|VGxBvvrHz>nNqdmwa(y=jZ9E<>6W;h zXY5rZWG+mz4IiBo0x?V-tQ(iB>dBNKzK~U3jM|>_baIYt`F?Oy!>GORodG7Me7#=c zt=~i})MUg0{i#>>j05~|^p-3mwu+hPm(`WT^Rs>colyZ|TnFxlB^mjM=(a43oc1Y{ z4o8^d2>ewBhfbH(1IOkvJ1Bnq%bW80LR?D(1 z+m)n><>8gcZPms4XE^X%Yh@<~2Zxi@wnnzqTy3YO;^JZnqv|FzKtHFrb8nEOr|b~n zYoKJ?YMDJC1tJ_9-r^XQDL}klCPC|8Vg<(&vCkOft0_rO*sVp;UZ<0!edDw)jCzc9 zEs#p_glJo=F`doIlhX~z7TgO3`*!)Oycf)AHKq+CsKqiN;pvH_Ii0iS+gnFX?MvK5V*hr3`Z3;G zmgV6qSg1|UEfNF%LnbT>1H`*ax4T%vp3N;HGVchCAwY^-Lc<&@s^ld=(8o64s;Q|d zZ9l2ZR~e@0nh~^JOh{HJ96VpIGz`fA3Uo>Uers3e%&BZMMfPt8biQgnwCmPSBf}2H zRM-A?WkHm;QZr|qa&f9CQk0NJ3JDs?q8j_YZEX|(Zv!Or0D=fviei}j{q${!zPSbJ z$6ufHN>&HiHIcRhK#fzR)kjhH--*1&RP5SlgJ1l477QAN z9SET?P6o)lry7-Mw;-oVSNZ|P4?7j#P^YvyI6W8&z}I-Zh!K7&-D_wq_BxHnyjRs| z?IY;cbJ?J+(lv!qbn)&$Ehwqt5g7`f(+#+37|9nSniC^n!1glqLrSO-RdH}LOE^+U zwjXr>mq}2HOd?u#TE~*O6eEIla^u_!3dcny!h2A%@os}0N_9=1V96$7*2xd!;y>=j ze|wRLaKH7e)JJ=ec(xMWd=W%p$aG6&RQ*K{D36Dw8R2fw|Viue6?|eB^t3Jw5u2duo*c)Zv z>D@l$Mg650jk{NuaLK3;@bht_-WA|!XmPDFM4_|1-Fs3{?*-qVZ++MLzzM_q!Anrk zi&2d=`!{y7?_?Ciu6p9HUt?5O8R|}7R3MeyyGz4ki-k}44r8>z3+!5fjslqCvNkNS2`W+@jitE{%Y=v5kpeav`{WyGDIG6u<$TlBU` zldIoj2E}2;b+;N1-@aJQO9EB>VqDR_V~cD|7J)FDH0-d)wWOH6b&&y^N2OC#lL;Enuj+L^HCP4WYmhJ`aB*?jTjGysNNS~eWr9-j*`U=nma5eoeI}GSROT?|-0k5GFHK8tn@lp}F zI0sNz0q$)>om%%UvCs6>C-l=S=4|cIxlek3RmRuUr6=^j44P+p?4XB9pGiJ-&((eA z=z>BpWG$v&@)s1p_cOzqReze90;FTZNdJrx&+LV9G|LVFMw;5E_ralG^)y*w5pQ1T z=NAShgvptNFi84I2?O1rYCxYTfwCH72e8@;Iya-~2<@0W^+bdb6qjf6w?KY2a0O^S zXClrM5mvX}LW%>}F~Pq~5yrn*2HhHH40N^P^VDw5OsBVcp$`#sW)*)q)y%R{1UCl- z_|W3le+UnuFa7xk4W*ikKBALI<>+zypn~rz66=;+YZd6X5xq~~?+Pe%9MAdI52mGM zL&Epw=H~kTl@P|dc>ZfkZAGOB1lmcv?*ZudWEP&UY>G4q$^BPi{x3sx0t8pulJ@)W z&Ptn_S8dbGeZAG!2U;eG?4d)v6M&M%HR>%ATVRROWldTO8YSIa9NtR!yGuz7OP0II zitvGke(5Jv-s>QjS@_H|RZxcMA|7C}Gr5~$ov#t^Ta!N{%6u6z7ra|t;JClt%ZCj)3%hPz3y&!3ouPdDW$Dx(OXA~yxTJS7RRlip%HZlYF%EG) z=InU{RUeodi3dmx_w~RoD)=>^m=XRA@V7A7WSue;3M(}iS@hZ78+qJ;?{v=uJ`rmw z=Y5=+OX8PFZC>SuDBoLnbOo*e5v9lJe<7rE#;GaJkAf6TPCpW9JVSv_$2=l>@$cnb z@|X_#RVnIu1Dj`g?jYuH+-UkI$jqOO==CLUJUr$X;p9{|9J(RUQcCLXAL~sIDP+c_ z3>aEl)K+=JS<(YdcqR%s3Y590zM4^H#Az|~+hM#Gjr*3^->jmnNJ&9orC<6~+kYTlx$?kDpJW3f?cWihqAO((1xTd|` z{y`|Vd-;3$Vym>YlojDZODj(TM{GSP;GSmaWL-T5U6iu}_vmg*9Yela73CkfPtSxh zgk5IryMa;-(=o@a<@9}FTuhyL*LgfOnl`|PIYQ>8=ejn%Fr^0#gN(-d9EmSI$o)q8 zjea(?ZFtF0y!^(D&Bui_0t%$z9*R1+_3vLS422BlSLSu>*ngKCWZI zeM_0{gM68b`RS7ej{O8Jb#~LT&z@K;3?;$_S39!O-ILyE=Sb$iq`*4_9yoR~IdR#; zDO8u)(iJrO7l5}AIKN?!id$#Ta@*Z%t?J`~D@W Yf1tfH-Mc>rUajy3;re+?^ZRsaA1 literal 0 HcmV?d00001 From 72c873dd6d041c2b01bad1afbbd7e182b9e296a3 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 8 Aug 2024 02:43:52 -0700 Subject: [PATCH 057/163] update --- core/amber/src/main/python/core/runnables/main_loop.py | 1 - .../scala/edu/uci/ics/texera/workflow/common/Marker.scala | 2 ++ .../texera/workflow/operators/state/StateToDataOpExec.scala | 5 +++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index 86a91aa9273..b80ba85ea1a 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -194,7 +194,6 @@ def process_tuple_with_udf(self) -> Iterator[Optional[Tuple]]: self._switch_context() yield self.context.tuple_processing_manager.get_output_tuple() self._check_and_process_control() - self._switch_context() yield self.context.tuple_processing_manager.get_output_state() def _process_control_element(self, control_element: ControlElement) -> None: diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index 6fca9d37579..5eff70413da 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -40,6 +40,8 @@ final case class State() extends Marker { this } + def size: Int = list.size + override def toString: String = list.map { case (key, (_, value)) => s"$key: $value" }.mkString(", ") } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala index b619f40eedc..692309a9468 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala @@ -10,7 +10,8 @@ class StateToDataOpExec extends OperatorExecutor { private var stateTuple: Tuple = _ override def processState(state: State, port: Int): State = { - stateTuple = state.toTuple + if (state.size > 0) + stateTuple = state.toTuple State() } @@ -27,5 +28,5 @@ class StateToDataOpExec extends OperatorExecutor { } } - override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = ??? + override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = throw new NotImplementedError() } From b9dd6abfebb3bc7f5b0d83a1bcecac7e6b075392 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 8 Aug 2024 19:52:20 -0700 Subject: [PATCH 058/163] update --- .../ics/amber/engine/architecture/worker/DataProcessor.scala | 2 +- .../ics/texera/workflow/common/operators/OperatorExecutor.scala | 2 +- .../workflow/operators/hashJoin/HashJoinBuildOpExec.scala | 2 +- .../ics/texera/workflow/operators/state/DataToStateOpExec.scala | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index a9fb39d899b..284161b0a6a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -105,7 +105,7 @@ class DataProcessor( outputManager.outputIterator.setTupleOutput( executor.onInputFinishMultiPort(port) ) - val outputState = executor.produceState(port) + val outputState = executor.onFinishProduceState(port) if (outputState!= null) { outputManager.emitMarker(outputState) } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala index 89a46aaf98a..02499b53fe6 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala @@ -20,7 +20,7 @@ trait OperatorExecutor { def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] - def produceState(port: Int): State = null + def onFinishProduceState(port: Int): State = null def onInputFinishMultiPort(port: Int): Iterator[(TupleLike, Option[PortIdentity])] = { onFinish(port).map(t => (t, None)) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala index e6f294de4a5..3c27644d060 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala @@ -21,7 +21,7 @@ class HashJoinBuildOpExec[K](buildAttributeName: String) extends OperatorExecuto } - override def produceState(prot: Int): State = { + override def onFinishProduceState(prot: Int): State = { val state = State() state.add("hashtable", AttributeType.ANY, buildTableHashMap) state diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala index 4d465a47028..0ae7ba89e59 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala @@ -22,7 +22,7 @@ class DataToStateOpExec extends OperatorExecutor { Iterator() } - override def produceState(port: Int): State = State().fromTuple(stateTuple) + override def onFinishProduceState(port: Int): State = State().fromTuple(stateTuple) override def onFinish(port: Int): Iterator[TupleLike] = buffer.iterator } From 0ee5fd349b6511cab8d66fd2b91fd3c481d91d7f Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Fri, 9 Aug 2024 15:14:18 -0700 Subject: [PATCH 059/163] update --- .../workflow/operators/state/StateToDataOpDesc.scala | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala index f46f319bf46..cedbcd4ad59 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala @@ -36,10 +36,9 @@ class StateToDataOpDesc extends LogicalOp { .withParallelizable(false) .withPropagateSchema( SchemaPropagationFunc(inputSchemas => - operatorInfo.outputPorts - .map(_.id) - .map(id => id -> inputSchemas(operatorInfo.inputPorts.head.id)) - .toMap + operatorInfo.inputPorts.zipWithIndex.map { + case (port, index) => PortIdentity(index) -> inputSchemas(port.id) + }.toMap ) ) } From d0acbbb458868923198b8601ec47f41a83baf8a4 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sat, 10 Aug 2024 02:40:12 -0700 Subject: [PATCH 060/163] update --- .../texera/workflow/operators/state/StateToDataOpDesc.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala index cedbcd4ad59..a275a525bde 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala @@ -36,8 +36,10 @@ class StateToDataOpDesc extends LogicalOp { .withParallelizable(false) .withPropagateSchema( SchemaPropagationFunc(inputSchemas => - operatorInfo.inputPorts.zipWithIndex.map { - case (port, index) => PortIdentity(index) -> inputSchemas(port.id) + getOutputSchemas( + operatorInfo.inputPorts.map(port => inputSchemas(port.id)).toArray + ).zipWithIndex.map { + case (schema, index) => PortIdentity(index) -> schema }.toMap ) ) From 66b39286bc7f1ee945669e63c6330ffcf9294dff Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sat, 10 Aug 2024 03:11:46 -0700 Subject: [PATCH 061/163] update --- .../workflow/operators/state/DataToStateOpDesc.scala | 8 +++++--- .../workflow/operators/state/StateToDataOpDesc.scala | 1 - 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala index 1466f3958e6..04d57d87a8c 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala @@ -1,6 +1,6 @@ package edu.uci.ics.texera.workflow.operators.state -import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp +import edu.uci.ics.amber.engine.architecture.deploysemantics.{PhysicalOp, SchemaPropagationFunc} import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort, PortIdentity} @@ -24,7 +24,9 @@ class DataToStateOpDesc extends LogicalOp { ) .withInputPorts(operatorInfo.inputPorts) .withOutputPorts(operatorInfo.outputPorts) - .withParallelizable(false) + .withPropagateSchema( + SchemaPropagationFunc(inputSchemas => Map(PortIdentity() -> inputSchemas(PortIdentity(1)))) + ) } override def operatorInfo: OperatorInfo = @@ -39,5 +41,5 @@ class DataToStateOpDesc extends LogicalOp { outputPorts = List(OutputPort()) ) - override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(0) + override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(1) } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala index a275a525bde..ee0bbba707a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala @@ -33,7 +33,6 @@ class StateToDataOpDesc extends LogicalOp { ) .withInputPorts(operatorInfo.inputPorts) .withOutputPorts(operatorInfo.outputPorts) - .withParallelizable(false) .withPropagateSchema( SchemaPropagationFunc(inputSchemas => getOutputSchemas( From 78f3259cd5f6f54fda03e1501905bde208133439 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sat, 10 Aug 2024 03:58:59 -0700 Subject: [PATCH 062/163] update --- .../ics/texera/workflow/operators/state/DataToStateOpDesc.scala | 1 + .../ics/texera/workflow/operators/state/StateToDataOpDesc.scala | 1 + 2 files changed, 2 insertions(+) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala index 04d57d87a8c..c8733f410ef 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala @@ -27,6 +27,7 @@ class DataToStateOpDesc extends LogicalOp { .withPropagateSchema( SchemaPropagationFunc(inputSchemas => Map(PortIdentity() -> inputSchemas(PortIdentity(1)))) ) + .withSuggestedWorkerNum(1) } override def operatorInfo: OperatorInfo = diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala index ee0bbba707a..053dafe92c1 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala @@ -42,6 +42,7 @@ class StateToDataOpDesc extends LogicalOp { }.toMap ) ) + .withSuggestedWorkerNum(1) } override def operatorInfo: OperatorInfo = From dfcfa69410f7005164f8a5b82bd0cba71993d8d9 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 22 Aug 2024 00:36:26 -0700 Subject: [PATCH 063/163] fix fmt --- .../edu/uci/ics/texera/web/JsonTest.scala | 34 +++++++------------ 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala index e9b196b1caf..b0a19fa4418 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala @@ -1,30 +1,22 @@ package edu.uci.ics.texera.web -import edu.uci.ics.amber.engine.common.{AmberRuntime, SerializedState} - -import scala.collection.mutable +import edu.uci.ics.texera.Utils +import edu.uci.ics.texera.web.workflowruntimestate.WorkflowAggregatedState +import edu.uci.ics.texera.web.workflowruntimestate.WorkflowAggregatedState.RUNNING object JsonTest { def main(args: Array[String]): Unit = { - AmberRuntime.startActorWorker(None) - val testObjs = Array( - Map(1 -> "123", 3 -> "1231234"), - mutable.HashMap[String, Any]("name" -> "peter", "mail" -> "peter@uci.edu", "grade" -> 4.0), - Array(1,2,3, 4.0, "good", "bad", 8) - ) - testObjs.foreach{ - obj => - val strRepr = SerializedState.fromObjectToString(obj) - val objRepr = SerializedState.stringToObject(strRepr) - objRepr match { - case value: Array[_] => - println(value.mkString(",")) - case _ => - println(objRepr) - } - } + val a = RUNNING + val om = Utils.objectMapper + + val str = om.writeValueAsString(a) + println(str) + + val des = om.readValue(str, classOf[WorkflowAggregatedState]) + println(des) + } } -class JsonTest {} +class JsonTest {} \ No newline at end of file From daf761546ea29d3e273e722e5be296402bf14d99 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 22 Aug 2024 00:37:45 -0700 Subject: [PATCH 064/163] fix fmt --- .../edu/uci/ics/amber/engine/common/SinkOperatorExecutor.scala | 2 +- .../uci/ics/amber/engine/common/SourceOperatorExecutor.scala | 2 +- .../ics/texera/workflow/common/operators/OperatorExecutor.scala | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SinkOperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SinkOperatorExecutor.scala index 052b90a0816..ac63e1f8409 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SinkOperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SinkOperatorExecutor.scala @@ -15,7 +15,7 @@ trait SinkOperatorExecutor extends OperatorExecutor { Iterator.empty } - override def onInputFinishMultiPort(port: Int): Iterator[(TupleLike, Option[PortIdentity])] = + override def onFinishMultiPort(port: Int): Iterator[(TupleLike, Option[PortIdentity])] = Iterator.empty def consumeTuple(tuple: Tuple, input: Int): Unit diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SourceOperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SourceOperatorExecutor.scala index 958022fd235..33f2a71730a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SourceOperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SourceOperatorExecutor.scala @@ -21,7 +21,7 @@ trait SourceOperatorExecutor extends OperatorExecutor { def produceTuple(): Iterator[TupleLike] - override def onInputFinishMultiPort(port: Int): Iterator[(TupleLike, Option[PortIdentity])] = { + override def onFinishMultiPort(port: Int): Iterator[(TupleLike, Option[PortIdentity])] = { // We assume there is only one input port for source operators. The current assumption // makes produceTuple to be invoked on each input port finish. // We should move this to onFinishAllPorts later. diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala index 32e564d04f6..28b5dc98509 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala @@ -22,7 +22,7 @@ trait OperatorExecutor { def onFinishProduceState(port: Int): State = null - def onInputFinishMultiPort(port: Int): Iterator[(TupleLike, Option[PortIdentity])] = { + def onFinishMultiPort(port: Int): Iterator[(TupleLike, Option[PortIdentity])] = { onFinish(port).map(t => (t, None)) } From 1add5d23cba371e73706331d61f5915d3a18894c Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 22 Aug 2024 00:38:15 -0700 Subject: [PATCH 065/163] fix fmt --- .../ics/amber/engine/common/SerializedState.scala | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SerializedState.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SerializedState.scala index 870eb3f23f5..0ba1d11b864 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SerializedState.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SerializedState.scala @@ -16,16 +16,6 @@ object SerializedState { val manifest = Serializers.manifestFor(ser, obj) SerializedState(bytes, ser.identifier, manifest) } - - def fromObjectToString[T <: AnyRef](obj:T):String = { - val serializedObj = fromObject(obj, AmberRuntime.serde) - s"${serializedObj.manifest}amber_serialization${serializedObj.serializerId}amber_serialization" + (serializedObj.bytes.map(_.toChar)).mkString - } - - def stringToObject(str:String):AnyRef = { - val fields = str.split("amber_serialization") - SerializedState(fields(2).map(_.toByte).toArray,fields(1).toInt,fields(0)).toObject[AnyRef](AmberRuntime.serde) - } } case class SerializedState(bytes: Array[Byte], serializerId: Int, manifest: String) { @@ -37,4 +27,4 @@ case class SerializedState(bytes: Array[Byte], serializerId: Int, manifest: Stri def size(): Long = { bytes.length } -} +} \ No newline at end of file From 11381c07067d8cd96a0cb73093038223782aba84 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 22 Aug 2024 02:01:26 -0700 Subject: [PATCH 066/163] update --- .../pythonworker/PythonProxyServer.scala | 4 ++-- .../architecture/worker/DataProcessor.scala | 17 ++++------------- .../worker/promisehandlers/StartHandler.scala | 2 +- .../uci/ics/texera/workflow/common/Marker.scala | 5 +---- 4 files changed, 8 insertions(+), 20 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala index 53531780df5..cdc39522d00 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala @@ -103,10 +103,10 @@ private class AmberProducer( // closing the stream will release the dictionaries flightStream.takeDictionaryOwnership - if (dataHeader.marker == EndOfUpstream().getClass.getSimpleName) { + if (dataHeader.payloadType == EndOfUpstream().getClass.getSimpleName) { assert(root.getRowCount == 0) outputPort.sendTo(to, MarkerFrame(EndOfUpstream())) - } else if (dataHeader.marker == "StateFrame") { + } else if (dataHeader.payloadType == "state") { assert(root.getRowCount == 1) outputPort.sendTo(to, MarkerFrame(State().fromTuple(ArrowUtils.getTexeraTuple(0, root)))) } else { diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index 62131f8f1b5..d7b89ae9129 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -7,11 +7,7 @@ import edu.uci.ics.amber.engine.architecture.controller.promisehandlers.PortComp import edu.uci.ics.amber.engine.architecture.controller.promisehandlers.WorkerExecutionCompletedHandler.WorkerExecutionCompleted import edu.uci.ics.amber.engine.architecture.controller.promisehandlers.WorkerStateUpdatedHandler.WorkerStateUpdated import edu.uci.ics.amber.engine.architecture.logreplay.ReplayLogManager -import edu.uci.ics.amber.engine.architecture.messaginglayer.{ - InputManager, - OutputManager, - WorkerTimerService -} +import edu.uci.ics.amber.engine.architecture.messaginglayer.{InputManager, OutputManager, WorkerTimerService} import edu.uci.ics.amber.engine.architecture.worker.WorkflowWorker.MainThreadDelegateMessage import edu.uci.ics.amber.engine.architecture.worker.managers.SerializationManager import edu.uci.ics.amber.engine.architecture.worker.promisehandlers.PauseHandler.PauseWorker @@ -19,17 +15,12 @@ import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerState.{COMP import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerStatistics import edu.uci.ics.amber.engine.common.ambermessage._ import edu.uci.ics.amber.engine.common.statetransition.WorkerStateManager -import edu.uci.ics.amber.engine.common.tuple.amber.{ - FinalizeExecutor, - FinalizePort, - SchemaEnforceable, - TupleLike -} +import edu.uci.ics.amber.engine.common.tuple.amber.{FinalizeExecutor, FinalizePort, SchemaEnforceable, TupleLike} import edu.uci.ics.amber.engine.common.virtualidentity.util.{CONTROLLER, SELF} import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, ChannelIdentity} import edu.uci.ics.amber.engine.common.workflow.PortIdentity import edu.uci.ics.amber.error.ErrorUtils.{mkConsoleMessage, safely} -import edu.uci.ics.texera.workflow.common.EndOfUpstream +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, State} import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple @@ -102,7 +93,7 @@ class DataProcessor( outputManager.outputIterator.setTupleOutput( executor.onFinishMultiPort(portId) ) - val outputState = executor.onFinishProduceState(port) + val outputState = executor.onFinishProduceState(portId) if (outputState!= null) { outputManager.emitMarker(outputState) } diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala index 447cd4ff634..b4cd89ec2b0 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala @@ -11,7 +11,7 @@ import edu.uci.ics.amber.engine.common.rpc.AsyncRPCServer.ControlCommand import edu.uci.ics.amber.engine.common.virtualidentity.ChannelIdentity import edu.uci.ics.amber.engine.common.virtualidentity.util.SOURCE_STARTER_ACTOR import edu.uci.ics.amber.engine.common.workflow.PortIdentity -import edu.uci.ics.texera.workflow.common.EndOfUpstream +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, State} object StartHandler { final case class StartWorker() extends ControlCommand[WorkerState] diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index 5d30b179115..dd26ae69a32 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -44,7 +44,4 @@ final case class State() extends Marker { override def toString: String = list.map { case (key, (_, value)) => s"$key: $value" }.mkString(", ") -} -sealed trait Marker - -final case class EndOfUpstream() extends Marker +} \ No newline at end of file From 484feda027070fc70e4e1092314cc60c0669b78f Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 22 Aug 2024 03:06:23 -0700 Subject: [PATCH 067/163] update --- .../managers/tuple_processing_manager.py | 2 +- .../architecture/packaging/input_manager.py | 46 +++++++++---------- .../architecture/packaging/output_manager.py | 16 +++++-- .../hash_based_shuffle_partitioner.py | 8 ++-- .../sendsemantics/one_to_one_partitioner.py | 6 +-- .../architecture/sendsemantics/partitioner.py | 2 +- .../sendsemantics/round_robin_partitioner.py | 6 +-- .../src/main/python/core/models/__init__.py | 2 +- .../src/main/python/core/models/marker.py | 37 ++++++++++++++- .../src/main/python/core/models/operator.py | 2 +- .../src/main/python/core/models/payload.py | 4 -- .../src/main/python/core/models/state.py | 35 -------------- .../main/python/core/runnables/main_loop.py | 3 +- .../python/core/runnables/network_receiver.py | 6 ++- .../python/core/runnables/network_sender.py | 16 ++----- .../pythonworker/PythonProxyClient.scala | 4 +- .../pythonworker/PythonProxyServer.scala | 2 +- 17 files changed, 96 insertions(+), 101 deletions(-) delete mode 100644 core/amber/src/main/python/core/models/state.py diff --git a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py index bf9b0e1e240..1febc5ee1ed 100644 --- a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py +++ b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py @@ -2,7 +2,7 @@ from typing import Optional, Union, Tuple, Iterator from core.models import InputExhausted -from core.models.state import State +from core.models.marker import State from proto.edu.uci.ics.amber.engine.common import PortIdentity diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index 52bb93c2f20..1aced125f3c 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -2,7 +2,7 @@ from core.models import Tuple, ArrowTableTupleProvider, Schema, InputExhausted from core.models.internal_marker import EndOfAll, InternalMarker, SenderChange -from core.models.marker import EndOfUpstream +from core.models.marker import EndOfUpstream, State from core.models.payload import DataFrame, DataPayload, MarkerFrame from proto.edu.uci.ics.amber.engine.common import ( ActorVirtualIdentity, @@ -96,10 +96,7 @@ def process_data_payload( self._current_channel_id = current_channel_id yield SenderChange(current_channel_id) - if isinstance(payload, StateFrame): - yield State().from_dict(payload.frame.to_pandas().iloc[0].to_dict()) - - elif isinstance(payload, DataFrame): + if isinstance(payload, DataFrame): for field_accessor in ArrowTableTupleProvider(payload.frame): yield Tuple( {name: field_accessor for name in payload.frame.column_names}, @@ -108,28 +105,31 @@ def process_data_payload( ].get_schema(), ) - elif isinstance(payload, MarkerFrame) and isinstance( - payload.frame, EndOfUpstream - ): - channel = self._channels[self._current_channel_id] - channel.complete() - port_id = channel.port_id - port_completed = all( - map( - lambda channel: channel.is_completed(), - self._ports[port_id].channels, + + + elif isinstance(payload, MarkerFrame): + if isinstance(payload.frame, State): + yield payload.frame + if isinstance(payload.frame, EndOfUpstream): + channel = self._channels[self._current_channel_id] + channel.complete() + port_id = channel.port_id + port_completed = all( + map( + lambda channel: channel.is_completed(), + self._ports[port_id].channels, + ) ) - ) - if port_completed: - yield InputExhausted() + if port_completed: + yield InputExhausted() - all_ports_completed = all( - map(lambda port: port.is_completed(), self._ports.values()) - ) + all_ports_completed = all( + map(lambda port: port.is_completed(), self._ports.values()) + ) - if all_ports_completed: - yield EndOfAll() + if all_ports_completed: + yield EndOfAll() else: raise NotImplementedError() diff --git a/core/amber/src/main/python/core/architecture/packaging/output_manager.py b/core/amber/src/main/python/core/architecture/packaging/output_manager.py index 9a072cd0c35..cc907b17f03 100644 --- a/core/amber/src/main/python/core/architecture/packaging/output_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/output_manager.py @@ -21,7 +21,7 @@ from core.architecture.sendsemantics.broad_cast_partitioner import ( BroadcastPartitioner, ) -from core.models import Tuple, Schema, MarkerFrame +from core.models import Tuple, Schema, MarkerFrame, State from core.models.marker import EndOfUpstream from core.models.payload import DataPayload, DataFrame from core.util import get_one_of @@ -101,10 +101,20 @@ def tuple_to_batch( def state_to_batch( self, state: State - ) -> Iterator[typing.Tuple[ActorVirtualIdentity, StateFrame]]: + ) -> Iterator[typing.Tuple[ActorVirtualIdentity, DataPayload]]: return chain( *( - partitioner.add_state_to_batch(state) + ( + ( + receiver, + ( + MarkerFrame(tuples) + if isinstance(tuples, State) + else self.tuple_to_frame(tuples) + ), + ) + for receiver, tuples in partitioner.add_state_to_batch(state) + ) for partitioner in self._partitioners.values() ) ) diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py index 1939bd61186..ffb21bf8e9a 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py @@ -5,7 +5,7 @@ from overrides import overrides from copy import deepcopy from core.architecture.sendsemantics.partitioner import Partitioner -from core.models import Tuple +from core.models import Tuple, State from core.models.marker import EndOfUpstream from core.util import set_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import ( @@ -46,10 +46,10 @@ def add_tuple_to_batch( def add_state_to_batch(self, state: State): for receiver, batch in self.receivers: if len(batch) > 0: - yield receiver, OutputDataFrame(frame=deepcopy(batch)) - yield receiver, OutputDataFrame(frame=deepcopy(batch)) + yield receiver, deepcopy(batch) + yield receiver, deepcopy(batch) batch.clear() - yield receiver, StateFrame(frame=state.to_table()) + yield receiver, state @overrides def no_more( diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py index 7c30af27116..5a788d89c23 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py @@ -5,7 +5,7 @@ from copy import deepcopy from core.architecture.sendsemantics.partitioner import Partitioner -from core.models import Tuple +from core.models import Tuple, State from core.models.marker import EndOfUpstream from core.util import set_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import ( @@ -37,10 +37,10 @@ def add_tuple_to_batch( @overrides def add_state_to_batch(self, state: State): if len(self.batch) > 0: - yield self.receiver, OutputDataFrame(frame=deepcopy(self.batch)) + yield self.receiver, deepcopy(self.batch) self.batch.clear() - yield self.receiver, StateFrame(frame=state.to_table()) + yield self.receiver, state @overrides def no_more( diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py index 533fd457d66..e3918c58b98 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py @@ -4,7 +4,7 @@ from betterproto import Message -from core.models import Tuple +from core.models import Tuple, State from core.models.marker import EndOfUpstream from core.util import get_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import Partitioning diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py index c71b68c6774..070f67e6fb1 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py @@ -5,7 +5,7 @@ from copy import deepcopy from core.architecture.sendsemantics.partitioner import Partitioner -from core.models import Tuple +from core.models import Tuple, State from core.models.marker import EndOfUpstream from core.util import set_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import ( @@ -40,9 +40,9 @@ def add_tuple_to_batch( def add_state_to_batch(self, state: State): for receiver, batch in self.receivers: if len(batch) > 0: - yield receiver, OutputDataFrame(frame=deepcopy(batch)) + yield receiver, deepcopy(batch) batch.clear() - yield receiver, StateFrame(frame=state.to_table()) + yield receiver, state @overrides def no_more( diff --git a/core/amber/src/main/python/core/models/__init__.py b/core/amber/src/main/python/core/models/__init__.py index f2a6131fd2d..bcb71c16977 100644 --- a/core/amber/src/main/python/core/models/__init__.py +++ b/core/amber/src/main/python/core/models/__init__.py @@ -7,7 +7,7 @@ from .table import Table, TableLike from .batch import Batch, BatchLike from .schema import AttributeType, Field, Schema -from .state import State +from .marker import State from .operator import ( Operator, TupleOperator, diff --git a/core/amber/src/main/python/core/models/marker.py b/core/amber/src/main/python/core/models/marker.py index 759190d5ed2..d04673b6378 100644 --- a/core/amber/src/main/python/core/models/marker.py +++ b/core/amber/src/main/python/core/models/marker.py @@ -1,11 +1,44 @@ from dataclasses import dataclass - +from pyarrow import Table +from pandas import DataFrame @dataclass class Marker: pass - @dataclass class EndOfUpstream(Marker): pass + +@dataclass +class State(Marker): + def __init__(self): + self.data = {} + + def add(self, key, value): + self.data[key] = value + + def get(self, key): + return self.data[key] + + def to_table(self): + return Table.from_pandas(df=DataFrame([self.data])) + + def from_dict(self, dictionary): + for key, value in dictionary.items(): + self.add(key, value) + return self + + def __setitem__(self, key, value): + self.data[key] = value + + def __getitem__(self, key): + return self.data[key] + + def __str__(self) -> str: + content = ", ".join( + [repr(key) + ": " + repr(value) for key, value in self.data.items()] + ) + return f"State[{content}]" + + __repr__ = __str__ \ No newline at end of file diff --git a/core/amber/src/main/python/core/models/operator.py b/core/amber/src/main/python/core/models/operator.py index 603cd3e4517..5f63f49e807 100644 --- a/core/amber/src/main/python/core/models/operator.py +++ b/core/amber/src/main/python/core/models/operator.py @@ -8,7 +8,7 @@ from deprecated import deprecated from . import InputExhausted, Table, TableLike, Tuple, TupleLike, Batch, BatchLike -from .state import State +from .marker import State from .table import all_output_to_tuple diff --git a/core/amber/src/main/python/core/models/payload.py b/core/amber/src/main/python/core/models/payload.py index 03cd1696626..48ae73ee818 100644 --- a/core/amber/src/main/python/core/models/payload.py +++ b/core/amber/src/main/python/core/models/payload.py @@ -11,10 +11,6 @@ class DataPayload: class DataFrame(DataPayload): frame: Table -@dataclass -class StateFrame(DataPayload): - frame: Table - @dataclass class MarkerFrame(DataPayload): frame: Marker diff --git a/core/amber/src/main/python/core/models/state.py b/core/amber/src/main/python/core/models/state.py deleted file mode 100644 index 69dc4b73e49..00000000000 --- a/core/amber/src/main/python/core/models/state.py +++ /dev/null @@ -1,35 +0,0 @@ -from pyarrow import Table -from pandas import DataFrame - - -class State: - def __init__(self): - self.data = {} - - def add(self, key, value): - self.data[key] = value - - def get(self, key): - return self.data[key] - - def to_table(self): - return Table.from_pandas(df=DataFrame([self.data])) - - def from_dict(self, dictionary): - for key, value in dictionary.items(): - self.add(key, value) - return self - - def __setitem__(self, key, value): - self.data[key] = value - - def __getitem__(self, key): - return self.data[key] - - def __str__(self) -> str: - content = ", ".join( - [repr(key) + ": " + repr(value) for key, value in self.data.items()] - ) - return f"State[{content}]" - - __repr__ = __str__ diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index 32abf123583..c9de31d599c 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -20,8 +20,7 @@ Tuple, ) from core.models.internal_queue import DataElement, ControlElement -from core.models.payload import OutputDataFrame -from core.models.state import State +from core.models.marker import State from core.runnables.data_processor import DataProcessor from core.util import StoppableQueueBlockingRunnable, get_one_of, set_one_of from core.util.customized_queue.queue_base import QueueElement diff --git a/core/amber/src/main/python/core/runnables/network_receiver.py b/core/amber/src/main/python/core/runnables/network_receiver.py index b66af3bc9ef..686223eeacc 100644 --- a/core/amber/src/main/python/core/runnables/network_receiver.py +++ b/core/amber/src/main/python/core/runnables/network_receiver.py @@ -17,7 +17,7 @@ MarkerFrame, ) from core.models.internal_queue import DataElement, ControlElement, InternalQueue -from core.models.marker import EndOfUpstream +from core.models.marker import EndOfUpstream, State from core.proxy import ProxyServer from core.util import Stoppable, get_one_of from core.util.runnable.runnable import Runnable @@ -64,8 +64,10 @@ def data_handler(command: bytes, table: Table) -> int: """ data_header = PythonDataHeader().parse(command) payload_type = data_header.payload_type - if payload_type == "data": + if payload_type == "Data": payload = DataFrame(table) + elif payload_type == "State": + payload = MarkerFrame(State().from_dict(table.to_pandas().iloc[0].to_dict())) elif payload_type == "EndOfUpstream": payload = MarkerFrame(EndOfUpstream()) shared_queue.put( diff --git a/core/amber/src/main/python/core/runnables/network_sender.py b/core/amber/src/main/python/core/runnables/network_sender.py index 43f1997f8a1..faffd40bcf5 100644 --- a/core/amber/src/main/python/core/runnables/network_sender.py +++ b/core/amber/src/main/python/core/runnables/network_sender.py @@ -3,10 +3,9 @@ from loguru import logger from overrides import overrides -from core.models import DataPayload, InternalQueue, DataFrame, MarkerFrame +from core.models import DataPayload, InternalQueue, DataFrame, MarkerFrame, State from core.models.internal_queue import InternalQueueElement, DataElement, ControlElement -from core.models.payload import StateFrame from core.proxy import ProxyClient from core.util import StoppableQueueBlockingRunnable from proto.edu.uci.ics.amber.engine.common import ( @@ -55,24 +54,15 @@ def _send_data(self, to: ActorVirtualIdentity, data_payload: DataPayload) -> Non """ if isinstance(data_payload, DataFrame): - data_header = PythonDataHeader(tag=to, payload_type="data") + data_header = PythonDataHeader(tag=to, payload_type="Data") self._proxy_client.send_data(bytes(data_header), data_payload.frame) elif isinstance(data_payload, MarkerFrame): data_header = PythonDataHeader( tag=to, payload_type=data_payload.frame.__class__.__name__ ) - self._proxy_client.send_data(bytes(data_header), None) - - elif isinstance(data_payload, EndOfUpstream): - data_header = PythonDataHeader(tag=to, marker=EndOfUpstream.__name__) - self._proxy_client.send_data(bytes(data_header), None) - - elif isinstance(data_payload, StateFrame): - data_header = PythonDataHeader(tag=to, marker=StateFrame.__name__) - table = data_payload.frame + table = data_payload.frame.to_table() if isinstance(data_payload.frame, State) else None self._proxy_client.send_data(bytes(data_header), table) - else: raise TypeError(f"Unexpected payload {data_payload}") diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala index b373666c412..935085a1c2c 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala @@ -102,10 +102,10 @@ class PythonProxyClient(portNumberPromise: Promise[Int], val actorId: ActorVirtu def sendData(dataPayload: DataPayload, from: ActorVirtualIdentity): Unit = { dataPayload match { - case DataFrame(frame) => writeArrowStream(mutable.Queue(frame: _*), from, "data") + case DataFrame(frame) => writeArrowStream(mutable.Queue(frame: _*), from, "Data") case MarkerFrame(marker) => marker match { - case state: State => writeArrowStream(mutable.Queue(state.toTuple), from, "state") + case state: State => writeArrowStream(mutable.Queue(state.toTuple), from, marker.getClass.getSimpleName) case _ => writeArrowStream(mutable.Queue.empty, from, marker.getClass.getSimpleName) } } diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala index cdc39522d00..2b5eb3863ab 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala @@ -106,7 +106,7 @@ private class AmberProducer( if (dataHeader.payloadType == EndOfUpstream().getClass.getSimpleName) { assert(root.getRowCount == 0) outputPort.sendTo(to, MarkerFrame(EndOfUpstream())) - } else if (dataHeader.payloadType == "state") { + } else if (dataHeader.payloadType == State().getClass.getSimpleName) { assert(root.getRowCount == 1) outputPort.sendTo(to, MarkerFrame(State().fromTuple(ArrowUtils.getTexeraTuple(0, root)))) } else { From eb60922f26b596bf1e0b1d72c35c9c5ada248344 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Thu, 22 Aug 2024 15:10:56 -0700 Subject: [PATCH 068/163] init --- .../operators/state/DataToStateOpDesc.scala | 46 -------------- .../operators/state/DataToStateOpExec.scala | 28 --------- .../state/DualInputStateReceiverOpDesc.scala | 42 ------------- .../state/DualInputStateReceiverOpExec.scala | 26 -------- .../operators/state/StateReceiverOpDesc.scala | 42 ------------- .../operators/state/StateReceiverOpExec.scala | 18 ------ .../operators/state/StateToDataOpDesc.scala | 62 ------------------- .../operators/state/StateToDataOpExec.scala | 32 ---------- 8 files changed, 296 deletions(-) delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpDesc.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpExec.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpDesc.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpExec.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala deleted file mode 100644 index c8733f410ef..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala +++ /dev/null @@ -1,46 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import edu.uci.ics.amber.engine.architecture.deploysemantics.{PhysicalOp, SchemaPropagationFunc} -import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo -import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} -import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort, PortIdentity} -import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} -import edu.uci.ics.texera.workflow.common.operators.LogicalOp -import edu.uci.ics.texera.workflow.common.tuple.schema.Schema - -class DataToStateOpDesc extends LogicalOp { - override def getPhysicalOp( - workflowId: WorkflowIdentity, - executionId: ExecutionIdentity - ): PhysicalOp = { - PhysicalOp - .oneToOnePhysicalOp( - workflowId, - executionId, - operatorIdentifier, - OpExecInitInfo((_, _) => { - new DataToStateOpExec() - }) - ) - .withInputPorts(operatorInfo.inputPorts) - .withOutputPorts(operatorInfo.outputPorts) - .withPropagateSchema( - SchemaPropagationFunc(inputSchemas => Map(PortIdentity() -> inputSchemas(PortIdentity(1)))) - ) - .withSuggestedWorkerNum(1) - } - - override def operatorInfo: OperatorInfo = - OperatorInfo( - "Data To State", - "Convert Data to State", - OperatorGroupConstants.UTILITY_GROUP, - inputPorts = List( - InputPort(PortIdentity(), "State"), - InputPort(PortIdentity(1), "Data", dependencies = List(PortIdentity())) - ), - outputPorts = List(OutputPort()) - ) - - override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(1) -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala deleted file mode 100644 index 0ae7ba89e59..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala +++ /dev/null @@ -1,28 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike -import edu.uci.ics.texera.workflow.common.State -import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor -import edu.uci.ics.texera.workflow.common.tuple.Tuple - -import scala.collection.mutable - -class DataToStateOpExec extends OperatorExecutor { - private val buffer = new mutable.ArrayBuffer[Tuple]() - private var stateTuple: Tuple = _ - - override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { - port match { - case 0 => - if (stateTuple == null) - stateTuple = tuple - case 1 => - buffer += tuple - } - Iterator() - } - - override def onFinishProduceState(port: Int): State = State().fromTuple(stateTuple) - - override def onFinish(port: Int): Iterator[TupleLike] = buffer.iterator -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpDesc.scala deleted file mode 100644 index dfa5bc434a8..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpDesc.scala +++ /dev/null @@ -1,42 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp -import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo -import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} -import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort, PortIdentity} -import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} -import edu.uci.ics.texera.workflow.common.operators.LogicalOp -import edu.uci.ics.texera.workflow.common.tuple.schema.Schema - -class DualInputStateReceiverOpDesc extends LogicalOp { - - override def getPhysicalOp( - workflowId: WorkflowIdentity, - executionId: ExecutionIdentity - ): PhysicalOp = { - PhysicalOp - .oneToOnePhysicalOp( - workflowId, - executionId, - operatorIdentifier, - OpExecInitInfo((_, _) => { - new DualInputStateReceiverOpExec() - }) - ) - .withInputPorts(operatorInfo.inputPorts) - .withOutputPorts(operatorInfo.outputPorts) - .withParallelizable(false) - } - - override def operatorInfo: OperatorInfo = - OperatorInfo( - "2 in Testing State Receiver", - "", - OperatorGroupConstants.UTILITY_GROUP, - inputPorts = List(InputPort(PortIdentity(0)), InputPort(PortIdentity(1))), - outputPorts = List(OutputPort()), - supportReconfiguration = true - ) - - override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(0) -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpExec.scala deleted file mode 100644 index ea6ad205f4b..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpExec.scala +++ /dev/null @@ -1,26 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike -import edu.uci.ics.amber.engine.common.workflow.PortIdentity -import edu.uci.ics.texera.workflow.common.State -import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor -import edu.uci.ics.texera.workflow.common.tuple.Tuple - -class DualInputStateReceiverOpExec extends OperatorExecutor { - - override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { - Iterator(tuple) - } - - override def processTupleMultiPort( - tuple: Tuple, - port: Int - ): Iterator[(TupleLike, Option[PortIdentity])] = { - processTuple(tuple, port).map(t => (t, None)) - } - - override def processState(state: State, port: Int): State = { - println(port, state) - state - } -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpDesc.scala deleted file mode 100644 index 346084072fc..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpDesc.scala +++ /dev/null @@ -1,42 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp -import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo -import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} -import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort} -import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} -import edu.uci.ics.texera.workflow.common.operators.LogicalOp -import edu.uci.ics.texera.workflow.common.tuple.schema.Schema - -class StateReceiverOpDesc extends LogicalOp { - - override def getPhysicalOp( - workflowId: WorkflowIdentity, - executionId: ExecutionIdentity - ): PhysicalOp = { - PhysicalOp - .oneToOnePhysicalOp( - workflowId, - executionId, - operatorIdentifier, - OpExecInitInfo((_, _) => { - new StateReceiverOpExec() - }) - ) - .withInputPorts(operatorInfo.inputPorts) - .withOutputPorts(operatorInfo.outputPorts) - .withParallelizable(false) - } - - override def operatorInfo: OperatorInfo = - OperatorInfo( - "Testing State Receiver", - "", - OperatorGroupConstants.UTILITY_GROUP, - inputPorts = List(InputPort()), - outputPorts = List(OutputPort()), - supportReconfiguration = true - ) - - override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(0) -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpExec.scala deleted file mode 100644 index 41af2291e6a..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpExec.scala +++ /dev/null @@ -1,18 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike -import edu.uci.ics.texera.workflow.common.State -import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor -import edu.uci.ics.texera.workflow.common.tuple.Tuple - -class StateReceiverOpExec extends OperatorExecutor { - - override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { - Iterator(tuple) - } - - override def processState(state: State, port: Int): State = { - println(state) - state - } -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala deleted file mode 100644 index 053dafe92c1..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala +++ /dev/null @@ -1,62 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription} -import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle -import edu.uci.ics.amber.engine.architecture.deploysemantics.{PhysicalOp, SchemaPropagationFunc} -import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo -import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} -import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort, PortIdentity} -import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} -import edu.uci.ics.texera.workflow.common.operators.LogicalOp -import edu.uci.ics.texera.workflow.common.tuple.schema.{Attribute, Schema} - -class StateToDataOpDesc extends LogicalOp { - @JsonProperty - @JsonSchemaTitle("State output column(s)") - @JsonPropertyDescription( - "Name of the newly added output columns that the UDF will produce, if any" - ) - var outputColumns: List[Attribute] = List() - - override def getPhysicalOp( - workflowId: WorkflowIdentity, - executionId: ExecutionIdentity - ): PhysicalOp = { - PhysicalOp - .oneToOnePhysicalOp( - workflowId, - executionId, - operatorIdentifier, - OpExecInitInfo((_, _) => { - new StateToDataOpExec() - }) - ) - .withInputPorts(operatorInfo.inputPorts) - .withOutputPorts(operatorInfo.outputPorts) - .withPropagateSchema( - SchemaPropagationFunc(inputSchemas => - getOutputSchemas( - operatorInfo.inputPorts.map(port => inputSchemas(port.id)).toArray - ).zipWithIndex.map { - case (schema, index) => PortIdentity(index) -> schema - }.toMap - ) - ) - .withSuggestedWorkerNum(1) - } - - override def operatorInfo: OperatorInfo = - OperatorInfo( - "State To Data", - "Convert State to Data", - OperatorGroupConstants.UTILITY_GROUP, - inputPorts = List(InputPort()), - outputPorts = List(OutputPort(PortIdentity(), "State"), OutputPort(PortIdentity(1), "Data")) - ) - - override def getOutputSchema(schemas: Array[Schema]): Schema = throw new NotImplementedError() - - override def getOutputSchemas(schemas: Array[Schema]): Array[Schema] = - Array(Schema.builder().add(outputColumns).build(), schemas(0)) - -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala deleted file mode 100644 index 692309a9468..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala +++ /dev/null @@ -1,32 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike -import edu.uci.ics.amber.engine.common.workflow.PortIdentity -import edu.uci.ics.texera.workflow.common.State -import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor -import edu.uci.ics.texera.workflow.common.tuple.Tuple - -class StateToDataOpExec extends OperatorExecutor { - private var stateTuple: Tuple = _ - - override def processState(state: State, port: Int): State = { - if (state.size > 0) - stateTuple = state.toTuple - State() - } - - override def processTupleMultiPort( - tuple: Tuple, - port: Int - ): Iterator[(TupleLike, Option[PortIdentity])] = { - if (stateTuple != null) { - val outputTuple = stateTuple - stateTuple = null - Array((outputTuple, Some(PortIdentity())), (tuple, Some(PortIdentity(1)))).iterator - } else { - Iterator((tuple, Some(PortIdentity(1)))) - } - } - - override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = throw new NotImplementedError() -} From 25468fab2806f40603a410759a067c9759fb8b2c Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Thu, 22 Aug 2024 15:17:44 -0700 Subject: [PATCH 069/163] update --- .../hashJoin/HashJoinBuildOpExec.scala | 19 ++++------ .../operators/hashJoin/HashJoinOpDesc.scala | 30 +++++++++++---- .../hashJoin/HashJoinProbeOpExec.scala | 38 ++++++++++--------- 3 files changed, 50 insertions(+), 37 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala index 3c27644d060..e1042b60fa7 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala @@ -1,37 +1,34 @@ package edu.uci.ics.texera.workflow.operators.hashJoin import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike -import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple -import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType import scala.collection.mutable import scala.collection.mutable.ListBuffer class HashJoinBuildOpExec[K](buildAttributeName: String) extends OperatorExecutor { - var buildTableHashMap: mutable.HashMap[K, (ListBuffer[Tuple], Boolean)] = _ + var buildTableHashMap: mutable.HashMap[K, ListBuffer[Tuple]] = _ override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { val key = tuple.getField(buildAttributeName).asInstanceOf[K] - buildTableHashMap.getOrElseUpdate(key, (new ListBuffer[Tuple](), false))._1 += tuple + buildTableHashMap.getOrElseUpdate(key, new ListBuffer[Tuple]()) += tuple Iterator() } - - override def onFinishProduceState(prot: Int): State = { - val state = State() - state.add("hashtable", AttributeType.ANY, buildTableHashMap) - state + override def onFinish(port: Int): Iterator[TupleLike] = { + buildTableHashMap.iterator.flatMap { + case (k, v) => v.map(t => TupleLike(List(k) ++ t.getFields)) + } } override def open(): Unit = { - buildTableHashMap = new mutable.HashMap[K, (mutable.ListBuffer[Tuple], Boolean)]() + buildTableHashMap = new mutable.HashMap[K, mutable.ListBuffer[Tuple]]() } override def close(): Unit = { buildTableHashMap.clear() } -} +} \ No newline at end of file diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala index 1c8a938b4cd..345dcecb03c 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala @@ -1,17 +1,29 @@ package edu.uci.ics.texera.workflow.operators.hashJoin +import edu.uci.ics.texera.workflow.operators.hashJoin.HashJoinOpDesc.HASH_JOIN_INTERNAL_KEY_NAME import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription} import com.kjetland.jackson.jsonSchema.annotations.{JsonSchemaInject, JsonSchemaTitle} -import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo import edu.uci.ics.amber.engine.architecture.deploysemantics.{PhysicalOp, SchemaPropagationFunc} -import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, PhysicalOpIdentity, WorkflowIdentity} +import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo +import edu.uci.ics.amber.engine.common.virtualidentity.{ + ExecutionIdentity, + PhysicalOpIdentity, + WorkflowIdentity +} import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort, PhysicalLink, PortIdentity} -import edu.uci.ics.texera.workflow.common.metadata.annotations.{AutofillAttributeName, AutofillAttributeNameOnPort1} +import edu.uci.ics.texera.workflow.common.metadata.annotations.{ + AutofillAttributeName, + AutofillAttributeNameOnPort1 +} import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} import edu.uci.ics.texera.workflow.common.operators.LogicalOp -import edu.uci.ics.texera.workflow.common.tuple.schema.{Attribute, Schema} +import edu.uci.ics.texera.workflow.common.tuple.schema.{Attribute, AttributeType, Schema} import edu.uci.ics.texera.workflow.common.workflow.{HashPartition, OneToOnePartition, PhysicalPlan} +object HashJoinOpDesc { + val HASH_JOIN_INTERNAL_KEY_NAME = "__internal__hashtable__key__" +} + @JsonSchemaInject(json = """ { "attributeTypeRules": { @@ -43,9 +55,9 @@ class HashJoinOpDesc[K] extends LogicalOp { var joinType: JoinType = JoinType.INNER override def getPhysicalPlan( - workflowId: WorkflowIdentity, - executionId: ExecutionIdentity - ): PhysicalPlan = { + workflowId: WorkflowIdentity, + executionId: ExecutionIdentity + ): PhysicalPlan = { val buildInputPort = operatorInfo.inputPorts.head val buildOutputPort = OutputPort(PortIdentity(0, internal = true), blocking = true) @@ -66,6 +78,7 @@ class HashJoinOpDesc[K] extends LogicalOp { Map( PortIdentity(internal = true) -> Schema .builder() + .add(HASH_JOIN_INTERNAL_KEY_NAME, AttributeType.ANY) .add(inputSchemas(operatorInfo.inputPorts.head.id)) .build() ) @@ -144,6 +157,7 @@ class HashJoinOpDesc[K] extends LogicalOp { val probeSchema = schemas(1) val builder = Schema.builder() builder.add(buildSchema) + builder.removeIfExists(HASH_JOIN_INTERNAL_KEY_NAME) val leftAttributeNames = buildSchema.getAttributeNames val rightAttributeNames = probeSchema.getAttributeNames.filterNot(name => name == probeAttributeName) @@ -163,4 +177,4 @@ class HashJoinOpDesc[K] extends LogicalOp { } builder.build() } -} +} \ No newline at end of file diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala index 27ea877dedb..fbb73d8edc8 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala @@ -1,18 +1,19 @@ package edu.uci.ics.texera.workflow.operators.hashJoin import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike -import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple +import edu.uci.ics.texera.workflow.operators.hashJoin.HashJoinOpDesc.HASH_JOIN_INTERNAL_KEY_NAME + import scala.collection.mutable import scala.collection.mutable.ListBuffer object JoinUtils { def joinTuples( - leftTuple: Tuple, - rightTuple: Tuple, - skipAttributeName: Option[String] = None - ): TupleLike = { + leftTuple: Tuple, + rightTuple: Tuple, + skipAttributeName: Option[String] = None + ): TupleLike = { val leftAttributeNames = leftTuple.getSchema.getAttributeNames val rightAttributeNames = rightTuple.getSchema.getAttributeNames.filterNot(name => skipAttributeName.isDefined && name == skipAttributeName.get @@ -40,20 +41,21 @@ object JoinUtils { } } class HashJoinProbeOpExec[K]( - probeAttributeName: String, - joinType: JoinType -) extends OperatorExecutor { + probeAttributeName: String, + joinType: JoinType + ) extends OperatorExecutor { + var currentTuple: Tuple = _ var buildTableHashMap: mutable.HashMap[K, (ListBuffer[Tuple], Boolean)] = _ - override def processState(state: State, port: Int): State = { - buildTableHashMap = state("hashtable").asInstanceOf[mutable.HashMap[K, (mutable.ListBuffer[Tuple], Boolean)]] - State() - } - - override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = if (port == 0) { + // Load build hash map + val key = tuple.getField[K](HASH_JOIN_INTERNAL_KEY_NAME) + buildTableHashMap.getOrElseUpdate(key, (new ListBuffer[Tuple](), false))._1 += tuple + .getPartialTuple( + tuple.getSchema.getAttributeNames.filterNot(n => n == HASH_JOIN_INTERNAL_KEY_NAME) + ) Iterator.empty } else { // Probe phase @@ -98,9 +100,9 @@ class HashJoinProbeOpExec[K]( } private def performJoin( - probeTuple: Tuple, - matchedTuples: ListBuffer[Tuple] - ): Iterator[TupleLike] = { + probeTuple: Tuple, + matchedTuples: ListBuffer[Tuple] + ): Iterator[TupleLike] = { matchedTuples.iterator.map { buildTuple => JoinUtils.joinTuples(buildTuple, probeTuple, skipAttributeName = Some(probeAttributeName)) } @@ -122,4 +124,4 @@ class HashJoinProbeOpExec[K]( buildTableHashMap.clear() } -} +} \ No newline at end of file From 995b6d57a24a9185898978b5c297d95345aac529 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Thu, 22 Aug 2024 15:19:11 -0700 Subject: [PATCH 070/163] update --- core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala index b0a19fa4418..64cd4247cc7 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/JsonTest.scala @@ -19,4 +19,4 @@ object JsonTest { } } -class JsonTest {} \ No newline at end of file +class JsonTest {} From 92b3c952bd4b9b2be32363e09d358a2e1b9b6e24 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Thu, 22 Aug 2024 15:28:30 -0700 Subject: [PATCH 071/163] update --- .../operators/hashJoin/HashJoinOpDesc.scala | 6 +++--- .../hashJoin/HashJoinProbeOpExec.scala | 20 +++++++++--------- .../assets/operator_images/DataToState.png | Bin 19569 -> 0 bytes .../assets/operator_images/StateToData.png | Bin 24997 -> 0 bytes 4 files changed, 13 insertions(+), 13 deletions(-) delete mode 100644 core/gui/src/assets/operator_images/DataToState.png delete mode 100644 core/gui/src/assets/operator_images/StateToData.png diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala index 345dcecb03c..51dd37ac962 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala @@ -55,9 +55,9 @@ class HashJoinOpDesc[K] extends LogicalOp { var joinType: JoinType = JoinType.INNER override def getPhysicalPlan( - workflowId: WorkflowIdentity, - executionId: ExecutionIdentity - ): PhysicalPlan = { + workflowId: WorkflowIdentity, + executionId: ExecutionIdentity + ): PhysicalPlan = { val buildInputPort = operatorInfo.inputPorts.head val buildOutputPort = OutputPort(PortIdentity(0, internal = true), blocking = true) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala index fbb73d8edc8..8fa1f9efd6b 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala @@ -10,10 +10,10 @@ import scala.collection.mutable.ListBuffer object JoinUtils { def joinTuples( - leftTuple: Tuple, - rightTuple: Tuple, - skipAttributeName: Option[String] = None - ): TupleLike = { + leftTuple: Tuple, + rightTuple: Tuple, + skipAttributeName: Option[String] = None + ): TupleLike = { val leftAttributeNames = leftTuple.getSchema.getAttributeNames val rightAttributeNames = rightTuple.getSchema.getAttributeNames.filterNot(name => skipAttributeName.isDefined && name == skipAttributeName.get @@ -41,9 +41,9 @@ object JoinUtils { } } class HashJoinProbeOpExec[K]( - probeAttributeName: String, - joinType: JoinType - ) extends OperatorExecutor { + probeAttributeName: String, + joinType: JoinType +) extends OperatorExecutor { var currentTuple: Tuple = _ var buildTableHashMap: mutable.HashMap[K, (ListBuffer[Tuple], Boolean)] = _ @@ -100,9 +100,9 @@ class HashJoinProbeOpExec[K]( } private def performJoin( - probeTuple: Tuple, - matchedTuples: ListBuffer[Tuple] - ): Iterator[TupleLike] = { + probeTuple: Tuple, + matchedTuples: ListBuffer[Tuple] + ): Iterator[TupleLike] = { matchedTuples.iterator.map { buildTuple => JoinUtils.joinTuples(buildTuple, probeTuple, skipAttributeName = Some(probeAttributeName)) } diff --git a/core/gui/src/assets/operator_images/DataToState.png b/core/gui/src/assets/operator_images/DataToState.png deleted file mode 100644 index 27419e8a612fb2d5b08a302c1f3eada0283d6924..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 19569 zcmXV%1yEdFkcA^S6I=#&2|hRk3GVI$cXx;2?(P~qxI=JvcY?bGcis2zE>*}>!7%gQ zz1^q3b8eWttk@R>T!arFK75f77gqf6;iC}nb3Ysm@O7dh)&=sCe8uMjS2*6LmW{{sRd)i9hmG zaVVU-?{_o!j|{^}?_T#VJtoVew|qGldv_-HE~dpVcN}$fbq@K{)P_qF*t%Xd51ZWq zUm_wRFa-nzf~%{mNgFgPluDIhywQ;g70uz0kQRah1MN+Sh#B4j~(5E@&8?2%_Lns zTk2S)UFLGNJ4{t`c2u16W;=>SzhIBY431>hADBY&xIb%uzStn~k!KuhH zhx>R(nGCl4LehNE`99B=lc3bPd1oD(;_n{J&aBtvTLBLbzgBG} zw<6*0vp~8>p($9YJOtuym(HIy!sl`*U?UpSV-lu*kPA-IYIUL4s#G2U!30R>Rr5aT zO_f-QjKq?b({-ct5ciC~99;3^zL|nxgrHc0*%eC|6^|k!xIX7Ep0qHrYX6IyV$!6Q z;<>;B3knXzfuaN}shu4|y7g8=aD?!_Vyjk$>h@)6O_VH0kaad-vF^3(^IU?2goJD_ zyZ`?w(FhfEB9sIC?C-u{c%pdvBY%Hplh6u-w3$5h=rQ#%8Jdx8?e&B~cFXgm3l%T( zbdbWzid4!Xb90IJahSm-dIM9U82;q63=I1zZD7t|WoDcc79*yyuE@N0aW)AYQ1E0= zBuPNG9!e^t9`^05oMZKS4HnP!7$Y-qt-S`oHfuFMR}byaDPHdHH(HAMd4e6&Ov(*du?LaHIGf7Q;}@9s(iKFP9;gbtaQ%T<@b?uQ2Ughxsv- z-fi)^doKR|bKEu`CrB<))*z}=IBqLU7Kn&tUkLe}9AB=pe2(Y#S`yFMtuu$J44B$J zb5k$=NUHXJEyj)74;3s7T;2*i|M$&_lenS1jLo|zFsUF+3k|zLt6QRwnm#Bc6fD4y zJv}`gQaCY4!2Nu`K@exUJLAcZoUcjbcXfSzBJ~;^HoSIaG09~9{jz(SzJPffIHXc{VPryPCBu3wKNV! z-))nLiHSw-!^1;EN|l!Ruoscc$xow)fQOuRVIYPQ-=}Z60amaeZ^Cl>Dj?j z>z(|f#j{^m`uhpCKdlN^q_b5`Q&P1)E3>2XKN|H1#!!KF_kWTYs5u-=X4W_Dm5}Q#cl90<{cc?dH_SXZk#3l)rdBbm)e}7x&iGPuj zZOM24gl7G`8l+lQF@4^!*my1{X%QHKL-g`NTr7W`cyw%J#_4p){xh)H-Jq&if*&6r zzv#H{CE?&4iLoLS#v@ap27Qb#nY^G2M`-xHpimKWAprV#)?x3kKbCgz$K~Ijt=8L< z)|oSqR2Q+o%fFSD+g}5EaF;ZO>hkjPRl4tQ5wkO)VkNPo&E7rC-$kgtFiZ$3e^m)S zY_zsd=-LzFXmA1bH8iBZ$H$=D~7?BhP?K{Vh zpx}&4Ttm0#yMIrU{O>QvYu{X)l&LSEm=>jmRIoy$N0s84XxzeA$;*tmczJn`;S{8$ z$3CKA^_(wI$jgZ`vN(t;NUlT8)M>RmE5&eSw6(Pbr+)Mrj;FX>E*Op=5d(Xc3i8GC z^e|&0WXd~z^D>EjX?4FgQI8?yy^B`?1^>82FjNOVlH4#Z;PM}dkt{RZ1kYi0jJ32@ z<;-lZ1!VW0#xqbK0{v>(y|bY%`=EPOGCA#kxi;CXC8JkW|GnlCYdv^eAzOL70|DMn5tr`#Z zp|FFaBbVWjdzp=u{|X|AWE&S3_trpM4kGiR4uK7OwBYKx=T+8?%7$bcO> zbyv2$j*d=1YHF%bUqXVHLl0`)hz2RoWA)GHyBbCNY7viizTJn*9Y*K#^`DHFnvp1X zjLu%Rz^=&p=DOag*X_?3Hmx9$RZNk^@56`n{C(QbOj913OTx+L-`^&F-yMnmps?Yz zc#u;=sHsB0IeL6~eYCh490`8DPzVxiGJ`g^5Di1J6Yy3z&|S#pDjkZ&!pjEMQBwHau%-tg+NOe4bA9uvQV*8uuWbF+%-0QKCcH+uz&+3Ci;Jr!0Om`&KOUYJifi3x!a!h?IMP zy_WC(uVK+d?{O+mNQ;MnAbGp;h7ES6nJyHv!zb~lK&4c%P;We)O{k}*$D#RqPrdnU z^4W4-v4pCsYNBj0?J|*?+fZ%cH@k_6PIl{+0yeX$&10bDdtM5Ff*o^Jr<0zZ&pZG2 z()MnZgb4B?Y}KnNqS>5-wo)$~WV5*Eg}`F0d8kN}V$^vd1~CQOo3ax9VF(Ass%2c$ zOsmzturaL5O|~1~=7fs=BKROnxH}7mHMuHt76WBDvE%2n4fZ|}dP2TKi|NBjRp%X; z+v)H*wFRnbVJ?EO#<9t?+nuEk?|7zgw1dd!rG^Eb+3WRoaW+g0BcNEluj(CyF^RIN zb=mHEzWyzp)jZzuVzWE=`TpFNkq6e^qxsvrSvQ5~GchDOxZU%9;Ia&imif)6 zTwK;7BO|F5TL_tS-fTeTFVzIe_8kRjV^Q;CqoX?uTc7-#%_tG0sO((dNW8%zp^Nos zRctc0Ow{r5@g6}?u)6yk`Y7HHmxIVA6Y0xJCQKttDdfX?XKTv}*1smC(^!oa-P|5D zyp>>LMk7fGSKg1ys|3gJvje@ zN%?vlXYjsgQEW6gF8)oTQQ_`~g=Gb!uOkZhC6)_ zmIY37yX7)s;&Y67g;oo5vRkk>=)@ zP|dAPR}fOCB!$mN1_{Py)c)=K@^C5o&%j_ptweqczmta85B+KsY-6#;tf=QWjX;KS&rb*6vmtmi}~L!8X6kbeQNe!=~TE(Km@%j*LB~t zSFJ$(Lt&_5*)O+?X@8c*>hM^-pN@W|vsq?us=;EA6U+g18g#5~yM9GE!2Oi1+xI2!vu9Fnx+ibq&nZuW8R& zYf0IF;6o*33hv=yiZ$+2DODbJcuP@wj+rek6BQQbnK>M$nthP@^xqnaPK6i_{8bml zo#v*7(N|ANNKpF5&%aTk2t9ATQg7ZR7rupI82Yst<_m898*tfUx=~0YP5dc#{2GiU zK)8-ebzBO4TTZ&$JtQjXBRzj3^ZIY9uU_nQVk?e58j_DO)?|h$uaH667o9jbOe~Tl z@nj|r2h;zke+WYaq42%l%!YNoJy{OVIqzS^<4PR%yfe)Gak%~cRP*Q7|770tl)6*m z`*t;?&E=ow?8nbAbj$VTN-4Bzm2)l)HPscr*Fy#*wZ4yGf*qTmUaa~JPOW+Pb;Xgl zApO@A2HR_^mWvj9I_&`^vwz~6 zK;OS^mT&Lq?7{g-W>nFfhCNt)uxhb>#;D$G{%Y0V{rcJAqc^Ci7ZDzQc&}44YMMHw zbj~8JvP2#WeUaoJplQ~tbpFMGyq&-6S6RpXTWT>pt$0}99<8ncYSzq#P_CY7nlWU*Uo5`p#uQR|%&`m*$BKG|@83f>T4tiSn1>kMdm3HLcnY)8qJ zR>r&SekshgS8fegp^4U#C$jl#^GA+iSf)(~>s6i*|1ZJh#I|pD4R4>ZHd-7xHrEZU z>mbsr@tkj#prVPfInMTw%x*h0L7}k4iEZcFwLh-+CwMelTU(mm(=L&eO2kyj5OW{J zLBG!%>c1CHe%h$Iid8Rz8PuB(A0x?da530bs&{5@SiqISmCCV{dMKtSj#WBeI_)kE zN>idA(UEzTTYPov(LPHM3*<0DLPgsJRn^tW|2CMOI|_w-(XE(0YIc2t`oxacdpu7x zEcTHF7sN~;!3xux(Yfj5u1pIR^Jh zy8+02;KzrZ7~QDwaLI!JFsSO2@pW;I_wlgLtt-X^YcPgWP#$!%&-K9)gT4c*WJX`N9uym((}!wSbPT{ge}fTAAD%;3 zzWQaSFG6W)LRQap-6FsVxS zx-#DpUp70a_|iX!eUL-3`cJCSVxGnjkXGkBH&a3%*@{Un5k@;-@BW^gt+w=rEWwN~ z#;;Or6kg#XO{TG!63$vQJOK>jI0~2TDkD!)Hm#@3VQ<9LVy;Aiumh>|U3^_`IHXr}yV;t%D&;!HJMWe?D)|x0lC1EiEm_ z23cp9CX+k~V^w$Nnd~hmK*z~eXt0=<^m%`Eh2jxj@i8mCQva>lJRO5zESu~db89^O ztEPTVPSiWb(J5)j$5>K;{Pr7-%>u4p-;pW&Rhgn*PNQ$dXgr&JHPki+9-gONZ}6A% z#Y*kJRXGCeVnF#Qghs@QY}m!wel#leqQ=P>K4AQEFoONqmLqq_FT9pKI*~Q%ezVe` zWn4Id%==>08|+qCF1N#K2$$`)9p;oIse(Nlmh;WEGFrJGHH5cW#GFnAORBReR2V-< zeYG=aqZLSK?i=vXYVd{alXwku|$Gnj`J7 z$5ijH-h4?NwV0Mcl;b0&m+O2M+p~QBiXXoy z(Q67nof?6WHe`|wfR0b5*9h3=Fkp!-=IG<|x`G zpzdEH5$j$j6-|wLI2_7Lbz>m0uM3zClX$kvSG}o&csv~u$R=*^I2s!1ySuxh9cC!n z79bTGmA9Efsg1+GP3K5%K}?~32opSZlk4t%1AT@6R$F$)kLSy34z}B4r>5kW^Mpcv z-JdS68O`i|9evcx>Ds$Rhq?S~+et7}rQ2z}ZQ?-e5ng^|&3{O?TIcGTgMdl%G64(8 zjC?HZC>JW8bv&=05TBeXAmo|42)Nu)yDn=ZY-yz_)~Q0q<8?U=7eT~oBei0t6}fNf ztMaZlV&MH}9k#(IwqwY!zRKgX%i*>GMLmP*nB&%n=lThv40SRM`J!Npt3O2h$0w-g zrmbZCa0wdv(jMx^Mu#bUq^{7);cfpa)lVg8T3*w4W{Q%Pt$9}w3@m#uEk%-g56MpY`GbZXG6`clP z42X96Ko8JQgl%B9nyWQ1Al4_pWi}j+B)!|V9zaPE19x9-?HSMVy<1Im`F-vP4-MrU zuQE#Y19!tYpbpnHoD9zo#J=$XA1wj_Fe#9cK*S3{9s^J}@bU)YXhkL?dyf=j(+Ljo|}vD433j?xs?sB9dVER_qR*2hPmBcvo<;cQRLaW{20{D zZwR@JxI#(cZXJC;*uhS_)iT+<3AF5QhtPIWId4$%1nuFLU zBuwv-V0@9D&btpVdGkyC>$IN`STH?APmc)xeO6E|6Tv9W)VdK!#=e7JfJuk8##m|_ z%pN&FgF(RUBu--Y^KxO^#t=>p)!7J`5;KzNG`6*|xiv3%3IWPcNdYf=oG8&pos4#A zGm_}$=^9~V^R>Zj`sxr^wKjdv?u8SJm`{%fInTiUuWe$<=A1x?y^Xh3#Ki*fDc z)|zqpfj=DpRzYKLkxWK58FX5!luX9N{>8cpv%~uLw%!FCSY$%CZ15vq5&a9I;``jL z7a0N~A}w0b1~7B$SE%A8r=+M|PGo)C0svTeb5e@h+f@>6()E>15GDpjAO0ctR4^{XEJ0`_Z=bmV?dhfwR9cFaZ93rIXXgEIod1kA}2^F||`~V#sy<(rrZ(vGg!W zq78G=R;$WqZzPGE$zri;!xuv9^SW{icg0K2&!59$wOFN#ywPB2ioL4$0j&M~1;Yl% z{IgJqQ|{wX(DiHAO{)-`4lyG%7jvf(LGslP#!!~+nCp|3hLa9Jl||k>lzbE(LMJ`@ z*({pR^(jT8ggCD6q(9!`4?MKmMj8Smwt$0@4M-8%F~^>OoA7BP2oa)qa41m$v_)Kr z4QV5LJG*LB8EUWaNimvnt2pIsIEo3mn!dlYZo^5GByZGIz;9PP+5cUzk5R5NDStI1 zFceoWfpD_+l?dMl9lyD6w}r;Bz`&MD6{teHdpqSn+HhYc)hO6=7D9Mr>0 zlW*E;Vtm}1CMo+Ye)fQ+KDi1Qs-Na-jTsZ5wuGcZUqJ<`8tM+)^snox;Vl9c4jOg) zlSZz`OGisU6ZTLkSMNR<=w`ECVST(DBvQ))py&XAm6Qn9p>C-IIalFo=f<}A$MxBL z9*7J&+S1u9w9L>6vVQDsg}gHbd2s0G=NPIPzD z-$XAq{~bAmN7h7Ol2Y{RD8RblYEqx6-5DoV{X%L%-`ab2pJM`1<|P7~k+l7o_xZH0 zGZt%1#ilUBzCOu_1O8+|cs5_Txe7k6nHGRQUtwQI~U$SuQK+T=t-%7)eMnF;G z)YR0NQ`<3=N;ANGu7+^>oPDyP4wmzM#Khx&oeL7INcJ34l!zpZB<&zWZrqIQr(^`+G0<9PC|Ff*DpxQ#{B?iPbnk4 zJe)v9l*+c2;l5X>cGvTTW(F(1k5Y!IMX(4-(eXq+T09}e=l9SYtc(g)J!TNAeKH-% z7~+tE9gJX}_tr08xy0fBT!df{4)^Uq6rRkGMU#P~Hba z=lkpVsSD(vtX%5FG>UhF+;^U&T|lyWtrkk97(IpRK3?s$I9|C#Uc^q#&Dj&G81pTi zl>TTsv(?wvU#(?{6wmbyfeZb4_;5AKIN9lOYe7zQilybik#?#iQtky8hr5$I@< z@nTu%L4V5+grLJlYN9GYQ6(M^5M%p!1!+}Rlsv@lYrRY2aD>j#_rgy% z#J5!uo?f^c$xebWm6_jqs1118cVXBwM0{SH(O|>bwmWuahuxtsQpt4g zK^~huZpmGd`sSe!VJWA$FYW5zzX1)3#baVWFaJ+8{x^@T?)u2jJLSn=~ zW80>iR$7QoT);z*`a@4O(%-3nnO{q{2pdBOPCAs4sU^{~+<`k9U1Ih|yj>B}c{zah zBW?q{;Oo^>%QB->4m;aaU*Ii7(a_HOtJjGbWS_UEjOnE-K+>Jrw`aLuzKmEnGleUY z&GSf$Pc)c6h=H6JhwDw6$jR3zNuwQvR-B}+*oi86oEk0ePwU zH0(!XZ&?yx1f6F<-(qBapPoKHPo+&^{nBT#TsyDfjh+z}8oJ3N zbQvL^YXL>g@Cl0i|kbO#1LG09hks>r{{ZusG{hO+r zyjQg@v&YTh*Pl-}%R(8!Z%E@mAoVX1g-GzV&snsXWHh^u93^%Mfd!1WJTj9F8GY1W zWLzm217ruCQ#R3!0Pi*a?sd-SGM=KY`2!Ui*qRJ4_p@&PUt!~$quFQ2vj`$|e0;A* zfGWzF0eq4D=Teui)#i79aPjoZ_0o`TP4j^-XT^5TL+iHl zbdL3nkH_P*ziv@LWioyTAaJ9j?x#?0XA3IVWAHoPe#UHEPcX;1ewq}mA5fYijNwZ+ z;Px!ch?2O1LXuiXZ7o>LC=KPgd9h1B9``sV#Z$=pjmGDT_e)S$$|)0@66hYQe_)p+ zQH%X)$Vbp(wFrp&%i8*_-Vobejv74Ef)rgp9P>Y}=j5| zi;F*2b8R6n3yex@Z@`V1hXH|oVc}1U3i@Vg^>dZ-MJs-g6G(P?5`TC5J)Q95nD1%n ziw!aF0exLjtz=>Cvki0aZ5PnT=hLjbu~F5d{K%IbT->hS?717W zczD>Jf~7z@!vmwftBrGpflRzqeg+G-7qJ_i=0*kUKbxl0`fuaI*;?y4xxPxvcRaT) z@UPxQtT0q8@~`e>p;)3ko71g)KcB2N13t$e9v=4Iph}I6oFYCR&=pK5^draAgtWSz zr|(}FaDG|wFH}UGL=p6%7$6P)9bi1nPoa-c-9XN0eT9-{O6}_=989A^$k39H8_IL1 zz7|lyTG4WkfsF}mf*6M@Qlnpw8PMHzoRR@hgVB1;3M)VPr!2KEC$3DV-@zIn=^xifZp+hF=TO&9zz_!Mw^oBkObZo9Z2!?r2?+DkK5DD5rdX5&ZoB#ktjpJu&MLBKg${d3?)GB zK|5ed;5i?;7N|oE@q~Qm`d4y%FE5IQwfY9Ky+cym9xk>@I5xblcgoAl7n{GsOGf5( znr3ocxFJ(gca0|i1`gqFz~}T3V5D7Tkx3_*jnEnHsz%mq(%MUd6yzO6KcYY|)ipIW z=UUG@zL7#83J$V;zW1Ow`3ocfZoc?1^V5b=>t=g??NC%%VKFkSwhmAph=1xgfw}^T zyK?eeAlXB>T~1R?fIh`ou3lq++bIEG7^zQe1isJZXaSJGRMmHM#B36HQ%(kIl0+Te5K(L;in$y0&ur*_8|#dnz0LlA9uB#^;8}fa~km zuP$$G?vpt@&c`Bww4o4cgxxZ%C$tYdLh<5-I-G7#TMTr!f_d{d{DZbL=YCiuN|nmnOhBa^;9uE&C#n7DToERlX$%EqXM7j%30o zk9yni`-C8m?l1L5L2J0HR7>~-@*!>ypd_Z zXD-*{#>Vkx0$V~>Naq4$4kHQq4-i-oW(SOqUmlbsY%)!2NUYClkV&HRt1@w|*`78N z8OiFQ_WQFE&IAd{^KLs~kXF&O8S|g^sax;?jv^U2bU?mTIpL2jF|^9p=i9kqTra!t zj$UFgIt2}3#7}~I+-3@D71Sjw{J{=H@CS# zB0(SdCoUvyY(`hh7CDh0brkeVUCko)5O_4a<-L!nRNUM+?@8S}sp;r$JQi`0Vp;a6 zD<^!?t3ql2__0>7;$(2xoJBiR3_1^lJ0wp;6=Ou42X!N-)Jp<&f_qrz%!w zLoMKs&&k=ixFPCPqoD9d{)(rIqq`3mjVlyvii#)f7RVHMeK4u}0$`)l%>XWO9a@ke znK-OMdIH$-RlK@Zum#aWBo{44>JJgsXDSEP!h%IS48VTG;~?dodH{kNu!N%{(TOKf z6dZB|BOfqDQYQ_<;#Ned!&~gs1W!Kl$z0>m7?Y6XxpZ=9sFm-m7?t=Qy7;P*GT5(2 zb9NXkhjZ%o|C%~4YHQ|aP^2TNpNQC+$%1<}pDmJkT4}H>uD4otAn5!9C1P(6!Q&^{ z+zm?@=gWFHVtTXe%AL2cB&VdbXciP7Nthe$05=N!E;>TTHc;p{ZicmxCfP-=zM*=A@S3d-&J1Axkq#RzNKtT*D{yv6)zpZsu~QQ#d+@ zY0FwM^z}(()Aek%l*4c!Qb*lHrUe!rL6M7D;j>y(U{cZ+uGtELNnU*a;9#-${hF)c z@my)b2()HyA4?!=HC5fF7~I|oF)AV=qJ$>c2v$)kKO_p=n5?^(BAvl*J>&No_SYfi z{NFrRoGc2dSf?7#o{b+(OLb?Bq=cn5G#1We?l1x~qY{3Q~BO;Wa zT9erD@Q_=4aH#&+#7JdEW;$~w;spzUTkpC7;*KjH6YCy~{7Zhx7IePe{$mEZ-wv6& z7s#S*Gy-j}14ug59?f?mI9(ZhBt$j&nrH3eSc`edGS(cNquh5 zhubM4KL}ush9vjN?PH*!SuNHL198>emHAaT{k875R|3t+@p)H$Z>cZ?hD^axB2Ctw zZv#~wPsfb_N4gjNJSERq918IO01I34SV|er}eQmNvpj`U((TFeyZURGIDd zDE`fe)foak%)YE7hUNWE1#zDJ-th2nX`9|wPm5Rer2l=J6A1{5k z<@fn=`jxqyBVsI;TA-?D%=>^H<{VijhwXZqSL_;fsLEO(Y|16Hj9}>-G4JJVNuZjn zeRG`FYl(3&v`jPqmiLymz&g^CB1szuIG|=Ql8|K=kgVE|^L)KP7Q@0h^gOW^7|{ra zOxt=Fv1f8l&;>PE;+aJ|Mll7L^R+}eMvb}Aa>)+#BC3htMq%>B3-A-x;x$Q9coz$$ z96Hr>We!O7VL5D9OH`#tzUb-XU4Kt2bOu}nEFAhDjXWnMV&ZZ6JC{o}hE>v;oR|MH zIBbKrtQB6k(YL*JqtZ|;c7NsnEy6T!pm`B3Rn7zL6iJu1+KY&Y&?ur}-f$+L>1iIb zHYsNjytac2)IyL5X0YfrSx>^Kva2~`XYn|55@z3`1ghjDmUGa)>?mR-&qzfoQ?<~m zOZs4VwL9 z6A)+TR04bKasIbTt4gUszdzPE+55G6`=BhZ@?+kHcv@pC`p+F#JgSbBrgLSw6k||- z!j}i`#$?!Id=11%VqdYPKh7sCET$9qL(rTbC%}rKc^?1CbCt&`e3~T52swxGU4&T0 zJUW%{7?Q?uih0zeIhxR4d3JZ{kVM2KByhL^L)O24@EjNs@>`>ZL3GaH)yeKdUU!y7YOs@^i5895q;{T8IiCDPVx ziAk&aB2#j4aZxA#cyVQ#3mDM&d4m1{jIhwqYe06Tk}VjeufLxja1k=)I;wDjQP!=1 zXxA<aIgh%&_iP842uN%w^Xxpwx>()1KAmhDl7_@31j7}?mkn_g(*1VM(3Lr9&xy+D@O&(O9fr6yL>J!p=V6a5 zqAp@f)k4h#-k}Qq=N-TG1R*|Om1TujzqBR+S!jU<888;&=6NR!}xg4eftzV8FEg^q<5 z$AbpI&DQpQeR4kAldSy2G9jJHr0)z^cnd%Xt=M)NPmzfjjAwo{QN!qaU8+#_M6b&f zNG11nH6zDQ!4^yc1w-_ceIw)hC^0)byYubwyponVoZldi&=m0A*M9gsY;nDnN!i`5 zaoaz^cZ!LMnsC^xJ}3giG!pvfM|z07Yzw^If1C{f4Vu6 znHgu^E%~y>QooBsHW>%gL&6Fe?HMAPar_P+8T#@EVwDdy^2MTX2YsC3&)9<%yM%hR znH-E;@fJH?OaIGj!T=EqVNmKkBi@Wf1DnhFGK=2ucK_tT!9j?Tv9ZW6ci9+QXFiV_ z2$Y309G4nT3rR{)aPY{w+s>^F@IE}2>lZvV+CBQ~IW{~j{j(pcO2!;kP%UxeOxXNv zK>ASj`eG+V|3ONLQBy;pS|(qVX@`qNZ5_=PxANTXCVYFWV(3;HKncP<(SzG+A#o(( z-6&fx*Qm1!kgbw0NrWjBG&h?q8c%CpTH2B0Fn1{Tdc3gYI&j3T(5d_5xiEeD2PR84 zj|B*(MH58yY3qkvI2eW=`#S;NSb|Eqyh8Hp_4ESJ>vpnZuT+8dyGRJ^1RutK@+S4s z>Oy0QvSde6(9%FFKisW)S{M~{#uDTfp4F{?LO|d>#szf!O#{0KW>BMsd zJafww$`lEfE~iS1x)u0Myxi{T+G2g7{eV}fsB*RIAFuV*`9;rMGxfBRmDDg)!m98h z5Wg&M9b;(ruAI$!`04F@aYfgH970QDKSo) z)#i$9rogU>24^6bqQv+9a?MA*$%JvUNB~$s7^lL!qKHtx-bbYJ>oIf#}s7QbpY+I$%-m$6HLeMn7^j~lV zSdz|kl~+s{&6c}n=n?t^;-)mZpY2681Pc8Gq@|_3Dgp6sDVNDPow3nJRxYJ}zrO13 zs3Wv0pVs{uCQcufH3pLV2OuPa05-YiB05D~H2@uqrLp+@cYpy2`Q|3-J4+!4QKn zz^CJ7mKZqVmvdcizHBwik1il${SPYs|*m9{|z4mtm)MBHI zuMNCM#~`}?>{hj0+GG(CvEt6F5n8cj4oetPq2u=IGO*zfB7V zt7f?T_ncD7uNErbZyL@V@X_v0SLTf#zLBxVt1vh_sg-L%=vm0>2$Su-91U4<_64pv zD}ROrthG9S+ovZa>8IEM0V;s+6@XC@Jr_89Lv+dO6qc6u=(-y8^GIS5b@61_Z!wFiO)?Jyit55&PKexFW3LWlHSdR^495w^XI9S;T&qElt@v@@wW zXc;F9C`dj+BqXqP#3?XP-l<)E``?F&k3Dathu)>Jw!7G?E^hwtaz_scOo7dor%}Nu zoNN?;hyH#9NN!J{XEG+6lv9}Xvr+~7kR=@mCPA~2GBGfPruJ9?l3|?bBV1FDMC?w9 zTsDse_F%54gxQc8k8g_|Wc!~Tl3>0V1$*|Y8K}cFnr|VjL&}ydT%1LV)eaS??Q+Q+ zt8DBcv+2WSfL-!5&aqKH$SLSGsPY9TD3T5l$jQ=&-XD}?*uv{tQGP=g%NULxh{lCS zxZ-DEINPkt%K3KL-Fz>0)s5sQusaAkOpmw}AeGWwTsMaRmrSTvHr1)2&lJ2?%gL zfPSwy-}Kz5|9s{Y~edr>>3!hUzpQdBBIwbRwmyG!)(cx|n0c z>`$tc>5pr*Q>)N>#GMeTU^88sMj~F1n?K8SrkxVW?N3ogNQ*|RfWqk0HdON@0R?09 zAcPyck(wWc5-91#W>aN3Vgjo%-S~{dw%*0}z9sPj96p26{JdkyR?Q3Vn{D??3-FW_ zKz>C$PdPPMYB|kj;=34+;L;~f(Y03B4YABZNOyO)+T*-VRQrJZ<+|s2g^aAM4O9#Y z6>=;JD7c~d7lEJT(AwTS>Ukg-HgU3Ve4{Co$N0*@8@-d9r|m^Ma_gG)2FVTR`bZ4g zf1DJ95HP&~i=USrPe^)X1;`e@0;tM|o}NA`!|9`ZfEn#-L6`B60+X($hCeN1Vujwm zg&IQAU)1IB{38+=2oqJfa)(k_4TZO8yWcfh3_cfHgX>1e!2gF3Hx&4Oy%2cE^=fJU z4R_<;ejo1eh4u)!#kcriFE`>?KUUBk=j)}ZUFE8M%&Wy80(-B-v96IB|Gy0xYr zQyX|fNHw+SgOqwJ!&C47CIDZ8f%RD#6YgP`o%>(#4j2P?#5JQM9*A>#EDW0+7b{bk zqf@h4k_|z5Gpy0E-4`y+`>gA)f^T`22b+GMzW{h@K2-M~HZ=x<-12v%0*HGWgU%-Z^8Kelcg5TqwpO?e(9^rS0wt zp5c-aJcjh+fNTSCKYR#)6I!a)?;Qo`foBq;5hrvc#mjz%t!OOk-BV4Vv3>$N(^Z!1 zx@VC_U8X9%rU|vOq(*G|Ee>-VHu!s_%O($LZDh&=pjvCGI@7nH3=9lr0HyF`51j2o zBHFE6S|!!2HM<;D`K?ONgmRyHjRHMY#uTviRquva6U+V@3iKkd zJ}zb{fSX^rGjaJ_rL1VK;s-~X?%wrM5+Auhc%#+&Do2OmpbI; zHS75T?qEElX(@-Mf=VEL`*i@FN_O?3%IDRRi0Bl1>1U06+5BRo&Du&xHKH24XSS3H zQzm>QF~bB?pt{X}kTitI>p{rKXx9VSMnN9Il`i73MOB?|9> zY5<~oa57)!TH$`Zk8{WRqy0-}fV`jJFju~%AhGU8`w)_@9anYHLINheu5LnJ&#_$& zc+Si$pF=Wo>}1E+S0_zYQzx>ag>sGAc0f^b-TW4&#R~Gq4tA&Ac11wkVZijc^pG3X z9D7A-moCbtM5hQ=sLyI5rgLNpG#ltf=>TE^r2(+WcOyr8+TFluoSp}f1k*9Rp7@s? z3K%Z>M6lb+%_kxJY{xeqI*H?#e+Gao_zbAX^uWk2L_Oao)!bU9*ayLVgqTi6MSWqP zb)*q&YuufG5nw6Kvv&I*fZsG8^C^7r zHjs4-1vnb3be`~+UsZnF=$w=n0)YpUYp9gbDO3l8w1{~meaxV(p*S*EScBPAw>~Xx ztPVsGSt?ZO&mN08lAxJJKni+_uX&?8XCmK8WtxBlgoXQ#0Jag+{at#@TKt^ZlWb`B zyU1aMlzZ1c4NRA9Q0G1QkdQvn7C@1j?oSra-yh~bLYLaEwkl@6G8_Pcw4etiI51Fa zJKzPY_(vJAd`zmeTkp_U^+olA1PYP{@o3^Rl?j6+`&C@Xco2S7U)Y@~g}kA*eT;e# z6%ERKrW$Cw9&J)iU}D~>gPx6^Wg`2vY!+zX@AI$y*!N#C z1a@61C#6zdvqz-d{5csSiGThe01os)3>)u{W)B>_>wI^BQ@rJ2tO98}`$yz^*o3^m z-hjHTjhp!I#45ABjVP2q$!>1Y$}%^QHu*4P#1|#b{QHUl~Y18i)r{$-^evX`fRSJau`Q)AD-N?3sDUSQP}La z`!Kib<41@RZS(q0%-7LkF1mmuR)#pisBU2aL)3=;WdP64x%#x6~B1ArIzu535hUmPDD2o$#@ zE8_)Exw602;PcOVFldtm=oFdH)8t!gtup>#w?Oc$nsSi^>m*vRD- z*@vU)@2F_Lz>8hIdi4~k_y(fhefQl2XXFR!>hHh*-c{-t{w~QtXY2d#zaNjni^N8T z(E`m?FbM-Fl(6DK;`QRplv2sD1eLjO-@e=L+__UnD!xRDHEY&PK@%EHXKGBa52^io zlW1JUaVB11Njkt7!Uzr-GHmn3OP8V~&R;CW>#x851NK9_dv1S77t_8<=MRmCvqa;M z<6-|eKFZR@jTN^<3ck||7e5$o}ZZmp!Gq~TKg4+KdDxM>)#I2JWfu1pWoR4>^tj*Wace*E}L zQgIAq!AnN^h|`Zs${0fU4Fp3N=zXt}fvytXvrL=JxWaQ4OJv-{5>_-wyk08fg~D<0 zI~)t2sL=QB-K!xLzp{kiBqSt!DvS#xbxco6Yz(Of2i?AXyPnj!S*tT=&cwr8#=hwB z#OWcHFmCopxDQlV-0YHyTcGRk;lr;B^F=HvLkQtFxCw=_Ew$f3l4PJO_1R~iErmfu zMMXg`F=`|#!cQt;#S(5$^+2YRZG_)(9Ar3*;#+UMm8jKft4hVo8r`^Yqp>(X!ee=Z zgoY487`>B!_wLjVYD2E_$Rw@_hs+{TfnW@U5*xp= zDjUAj4CgW*L#s&h5EW_cxN&O@27^i}KGy2erAv5WeyL-N<0qDIdnpOyPtT#O^_HYm zqL1^wf`dZHesDpM8!uhDKWxSkrKBDhtci-^x(&n<$6qX|I8#wW2+?h^&-nQG6gGUu zvZha;z8Go7buExOpd=S7l>d_Yz~>PSL%sOsn{TE`?JE!^8R!npp7Z5=WQMSUa2X|% zCF4SWONC5U1>bKP;uv4fnKNH14z|h{jbXWvaz>S0#ihlC3m2lL&dVNsvts2mjmBgU zO~*AEFiR50S16$|#D4Kf$AL=4fvL3M9nyZ?kg!m$E>KbDfLkW(V`5_ueY=WzghELM zx=WvZ{`pcED_WtrPmwGc7jcE6S3q3-IzRvX{)F|;>kAPYLwY=M{KOK*RZ7CRLzdQC zlGqr+GgQi)>lspgIcM&CIUk@87$3?>kb*W|G#aQ>S|L>CQs?D}mM&d7qhrU8 zDff?KN+0?umUNsS>?4FBWFDb?Lf`iPO`ZA`>HwD~e0NbyT->13s~g@OU0&rOOAi-jc+|P#qLP!f^vdVZ5i+R+Bm}XX%G6KX8v; z(hCzjJ$za^ALB}o}W`Ydu$QBh^7^Kzy#Gc#WkdWdKO z3(tp!i_Z-np{w(ciK#3}26|c_5QB&cC9FKOUx|vYgYbA=ZfIHa4ryM} zWyB3nO)x{q+%3sKZ<5#`qO%>92(!sZZSQKKVhfG3 z*jTw*QU$ojR%9N;mV;OE2x*ym@mMEEB^F50(pe7M$rKn0q4~F^K$34h&96Na&>1YSkZp_~FW@o_gv>eieh^aeTrU0)xR2l9!iPpBTeae2_;N zJb3Wt<;$1%#xn76xGRb?@n>VmV0beM{?qFrhDYmZLkJ#z_~8P4!YB#frNUq^d>fxJ zhTcNCa^<)eY5R&tP0?sH8T=xH!H|F%LmR1Gr_NQGprs0 zxm;yxE6cIWn#c@>IhZlDk!sbdzB<*s>RKAEH=5u2jm@!NJZQ5XoTp z8O#{kh)Si(uUV_+E&i+50|RZ;8)Wq!t7r0P84QLb%oy6iJMWDAA*Nky8vj)UlGOTS z#o%fxG3Ewd+XB2pjiyybTx{$KK8V3!NT8fNc{0Dqu$H=y*8Ta?r3)>C)xr5^&YXFS z59Xm3O!_8^*uG)Z3xyj2!!J`qNWTlWE1UZpMn$zwf2vWVKQ&RU|9WrC=+S&IgTZ(J bne6`ok+|m^eJ{$T00000NkvXXu0mjfFPUJ@ diff --git a/core/gui/src/assets/operator_images/StateToData.png b/core/gui/src/assets/operator_images/StateToData.png deleted file mode 100644 index 6fb07c4c71e807ac414135805d0097b17a3399b0..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 24997 zcmX6^WmH?s7RDvGySqby;_gt~iv*`Yix-#TUZA+UI~2Df!JQU&cXxf~uJ;2;RS9q|OyPmY$d2;*E>KYDegFMH4?2`uKtY)bDo9Icc^IE&A^PCTJP&fXd($P+ z%RpcPbR zby4k+Q@+0-MhL46+AEg0cz9`byPIq(CL~#dr_<&YdDp?{4K6hh>7Ck06f6I+n&LITG9_3z*N3C0j#KZ)+ zmdst6B`TKagq#~$+k>$sdwY8yLo$rd*-|<#k{Mo%Gd1pU@xXy69l>zt0pgJyalI)N z`)|%W-LMiPnkzV)DJh#ZT|00b;TYtIbR*!VB-rj2Bzl^}ya`Gq!SY&H<&2NR#vP8+ z+q%Y1yZ(?_ULJNrT)g#ySkX3%@pN7yL=5t;w1P?biDno7#9RXfA(-;m`&hEIuYEh6zPj?lSoPS8rqnWxMUL6%wMQ7!+;e&d*OALN{ zMA{1P6wmX8rStfx?zSj~L8EInvv01b5{=PM^7HaKCnDNOOA zsc`D)X+xAQ;9{^=m3@D6S6*y)uPJ`vVWxWu3=Hh&G*A$&El%$j`yICn^WaGb!xsS; zc>gVgc{6{f1bq3R7$|)~0(KC8sQcvtIqr-Bmr0}7yia+Xy)(U%y&XC}ZS({&Q1CnE zQV5misYgr+Wui-r-Kz(&h{Y@flxZr6auT8n`qW8{ua%`=&6a*Whyq28L3-|T?WD=> z282fN5^zx+i1{5i{M}BMc=cRoroz2%+}fou?|FNJS(xY|_zc+z=G>4c3$Y4(2cK>{IBvJX|3+av|yI()J&S7A>I7tf`KL9df80CKapplKh#_=wFte;F>Dx~^FZ5Uh?Ef`*yy1m!|-Bs`XjsI^q3TSQyRL8ykU}I`|k!emlF1B zQtyBzVM0=+iR;bw;*G5buBUx5*;C|Rpc82wz9wFxbZb z)2;D)eM&|^BmPRjF+CNogf07%wAFd8Qoplx1{ohW)cd^CeLR)RJQ*Jkk1ipE5*Htz z9uF6n)||MoJNDIR^|J+cj(|5k#J&Ofi^g)MN{?wX$Sq9DQSWqYW~J4|n&cs*MyB>Q zfC;X~1Mla3mxf16s}QgXiQJ5>1KZx;7(HI+6LLEIH(p+EAZi>??xJEOQ5vc+6JZ+c z5>K+>*~=Afou>DpIVfavC_8&X%P9V5^&3Fk1b zU;4?A3uI%SPCD<`jt%lj1L5GmUF{50sHT0eEIlYBZYvo+bz|@nCdXmw!d(ItisKmB zr1(REx{msulVqBcl^FZpFJy=9bVW5t*3Ccy~RqN4HGs)}zp^ zu$exAw?ik{vvfcjJ6h&0xcX$4qU7MR1kZP;Kqk@Yc#)CNz{khk;3v8$)@}*{4r5^%rs(u9*vGx70U8Mw2+-`Jo>vx; z4-FrnQKFT`HZNP?X?VXYWF^US-esb!A7G!f8wDD;(rxe|Q;0E!`Oe7Bkyj7mJ9vZr zUvJm!=Bvp#vh(w>_zsw)yw|Bq;UZOOo^bJTbzItgt%g&Yz3{k*Q3VT3ES}X23)g(! zV5n3b9_p+ov&ZVp2Ssb0cSj=f8$x9bbDNIk3Id&4R|7L}lNg{^qxb1BYZoff+b6!7 zDWxRB*eWY)%Z^(&h3ww<A#b|fJo>6K((m%>@Y5x_`0tsbS)t!-ZI5De{NEnC4Ss)#4_dd>_W-VsC%x<7`uV7$ z<^#FMMnKXQ74UuD3w!%XeGJfye9Jwr_e?ohC&F4cW~CUrSKHme z0v1DYy#F<}RO+u$F^M?}EC0e_*qcX7k8UfF{88*0mVIX`B!Uns=BtgKYmRHqYhf3m zN21W&^AwetN&O6wGY7#Q7S`6*CrgdTpz*8OaG72WAE23z=I+_&KX9{Cj?xZqI{u}F zcjRi)QnTYXVxK$5!)RG0r}{H*6L}FZ`Unq?-$auKRAd9O_m>9$?`N*FI*h!2_WT<- ze3j!JF8dQKgFnfh%ML2@uyAOjN%+nZ(zvbHnBLHd|DM%SgQ6nY>r%(%WcE}4s3@NL zzdwUB*8QGO%YhwG`2O~M&a0X&oEABZQ>jyNG48(J)OQt`s*ljnVd%a3eOcsz{G-64 zlftUfq@m&dR)bNS>*5+U_jN|OcEuS%XAH6*zelY@Lb0-_xh6!3aTK)eIQs1&91A>_ z&07(Ba3UokVH!j((#G*}x9p%))kA#`wuK+r+}|QS*;$Pd^m z7}LK}wyKp(sXzaS0sH|v|0bQTz`9gb$yyS4A{S5cxioJ z{pZm6tJ|>Dxdm?B#M;8*BxxYw#c#$u0PfylIv=tIoDRPNS5;-6h^7O2QBkM(8M&jk zU;u=tSY}F@)y<%-C{yvj3<+PM)J{keow4)w- zKwJ6D6hie`68{9sbAjsWW}2h}gb2fz8RT)W)^LoUGMZGp)2tPWPSyk(i3Kxs61Z!! zq%Pz-tap(los~q;PbbFVQos@Ea}Y+pi+cUe`ZYL>Su(4ShHt#w^v&HF9?~nbnLse{ zAJF`R!T5K17{*WvpOPsSw}>;TS^ScU_3C1HHjG7-O2Q=2*HE`8v}obbRZI8Efc<`j z`-}4R-nbI0*x_r7L2ck~3Buf$A?o?W9C|$Flig4su5%L!27aa=AkT_+^TD4~S%Tl6 zx(6xzUwXbRH#PP`88Pdf5aNBhbBqrLv3r}hnn5Bb3SW%LUN2HTyI-!9FfJX(g7~4? zQ%DZ+wbI%2IBO7jz3}M(J~-Eqq9Mo;p=9UuMXM`qAC!@+}$} zgWbzP3IiVJoguj?WQ>sDI`hB%c#{`?MQY?5qKq9n7215>cZj|eJ_mUWbMN=v7*S#7 z#Kue>!i5i!Q9;JmA*@HFBh;GodU#H+p&nxJd-zWZ43u8Oa)~O4of2;V1jZ=RLuI&JD-al zM2?}nrIO$!A_p{Oo zSBY_KqJKAjR92K$&fNLGH}A#2x!?P{t#_@UPM7rtG`6~*uZt9DW(-5xZG&hobfs}6^>2CB5Lb=#2Ght$ogJEdeqShXge!z%im_{ zmD6uLTSj8jN57&}5c&G0&c-e{G%wrL!4ebihOh0q9di}bWZhcUQJk& zn*wK;L^tynfy$bHQknx2O-T-t)B|=1V_)$M;QN1AP)zM4a4PZ`kIq& z@6rC!bFygU?Cd;+q01iEcr5M_B&+3BnSVu3#9X0~Q&qNk)_(4{*Seo=u+sMZ_-iso zFVy8g4kD-)XaVbL+2y^YJUk;27p$UzAM8B@z|bk9`lr)!6rP^j1Init%U@7!^y5ll z=`LRH*PA^rAp~JdUz?O<_QNH0tRNZf(&uZPU{Wr#{()lUjL@%-m%_BW`#JuhSIfmr zVlh{=!Vo!pJjYJ4cA1h!Ow6h9&c`L?&i&FfxXf^EYn_Q zC^f}Rd`STcU!MCAj&8$Q)&NO_cXi6z-SIpzu|%puP%y2tx`iu5Ka0 z5x)2X`+f!#+G@xB8!6GPOGI>1{;5*!3Z3`Y)1i3w|9W=+6bUMVDx)HN8gAOZ&F~e2 zRe%VC@Aons+yg+2EIUV7Vn39ps=cU$Zd*OOAz;A{K5>t3lz6ua~jE0F{ z2?>i_nPKR6juK)NPj8sUdTa*G_TO#?NF1VZG325*+VLGfA8JN*8u?-EMlfU(2IR57 zv5AqB?>a&klBj|UkrUU+UY|PK9M(nuH6Yr2d5{u7L_o3pD}R;4l)!K{yqLiBW^r+8 zZ0i)^;Fot{==R}PDW@rOxl^ogy~*`}vQ*Qz<+&8xSr)hUEM8R_Xizn7+n_dsN+u&O zL{sKZ9|D=`h``osFtE$LUx0-&B8{-(*igj8Nndkc_f1IQ0E_AaENVJ+&2WcO3$wluWqVu=d(P~m>i#4Zm=Q?5nK|a-3Q)Vs zfP_itX6hL+vNMi3N@Get+RBZuGhuX4q$0)x=T$ ztLTAGvB~sv%)qF=>*;L0n*&yvOM^7u?#o8c0;RQh)oQ@~+A}2d-M-~K8yXhed5|Ba z$oh*}yZl9dt{(;1gNNIvOU>^(u0N4pEC+!jlfJXN%Zr`GLK+lso0cpUG_Pc3wU?j7 zqAQ9&!7m1)=sLjk_iU#Dj^GEztk3SJOW!O011KN7#uQo4!h-)pd z=4n3mp@=1R6m`I6Q0ocZa%stI%K(tZ%T^ReC+~IRd`@eZQF|<$1plDQFL51xiS;hu z64i7b8~sTG2P1~K-au7nbrk=fM(u7^%KneL3i&PzaXGB~GfPnsgStDl{NKHs15H>l zvqnQ$YC4=(`3#y+4`!;Cn*~Ld<`B+bwSTZ9p`S-S9G2wFygwb+ls}9Fv4T&F!eob} z`#+3bxQlsC$OqVd>owuy!$4;JcX`C?Uld=|HH=%eF5ds&|WL;#uM?Es5$?Rm_*UCm~SQzj!DeOjTo|_>Adu{;LDLBBzMnZh2 zmQbCWI1^daKks-xE7;nsrBq@pw8`}FY8~>*AE9HGGMmxR^nck&fCPE1E@?(jUXO4X z-kq&xT<#>OqI=!`)737j3q>e(jB*(MMfck5u)YRLCI>O3h!0Il_7r;p1;K?^^ek1W6v1$suL=@Sj_- z_))u)yE#5H_hlvYlcPgJV?_&05jiF^t*mNwCV@e%F8gjiPlqL9_*ooZ7*)nR_a};S z_&w_Z$ zTDnDi^HTb3wq$atKrXg7U!y~{RB}#xDIAL?xzTR^Ovu6XU!67I%{#si27YG4a4(IUH0yhfdMm%LITW*Il{01K#3Ii z9J+1RmPxZEs(jzp^ZF)am5JYf@c-HQGi2xU%S3)OneE;TTfgX-!(v)9Sp7u{72sB$ z6gU6E#k2$Xd<&tzAH;g^XSo;u_cg*i7|k}=E-LjK&N{Y#3c0zStvr&ngb3ympA1iQ zNv@y~wfW1FLI3c@6vs>cQM#{<^+7~PKw(te`U^Joc zM>m}ZT4j&tp=jnyP2Zjm=c?8l1wA$pKN^BOaRT|a!VOT%mKv;85t3rhfX+cS!T#4o z>kGwtPY@ypjyo*%{DS;~T}`Lkaeo930oZw%Zdb6$PMFK%Vq;v*FRwx*t2(!I%JpV} z>*MyPQndke=5fU>MbfojKXmtkjUABov8a2iI=xih=cV31&$n4te4UOcvgS|XtcENb zvy!TUOx+9D@&~E?-)`s83Mjk|^1!h#$!AWc(ZA-&2A~qjP+%(CU*#LX1pkp;gl)Z%SzmU3=n2TZyhI$vXKrhW?Z6C`zW;Um=B zW5^S^3!|rdy(W*wdb_Pb9K;~xH1RGNm{0WC4bZRBZxH@;b#;ZE!ROGW`l4@>3UE{< zKqBchekqIOwVPYvv|njyhU5_NBo5x-&D=1tvUXU^7AuqG-2|%pI9IoywN(KcSmiof zy70vY>_d}2uZJvS!7JijNYR6&GDkE6-1j3xjickF$p2un@KjDBg{YI%J8tEn+Eozg z*k1H0Afe84hAH;eTaHR=j94BDotB4t7{pvx<~R=VdFX_d@Ey9|66;#8_$3XKK3k zRGVdRG+v1s`-^;o5E4Db^YdMmW^w%l+7h+Ly~J5b(}mu;;SclP)B7Jl1kx@U~jx2c7KdY53ww(@&fk-qXP z=)IOJ;u}+@Qs*5X{Vm6FQ8r>y^8>i-!Lnm<{eBfv2W`N~a&ygLGMfQstf`@tvNp*K z`6Mp;fz?u%uXlz*0)5QWY4bXJOPc+b&Sd~#QthP!&RRYsVn!03NXiI%%y3Q`oa)c?l*8zAN=)mu)r8*SZc=P$HD2g!8 z7SIE)X1h16cL44`Bk&J}-+~*7We#q-Uv*#q3kG;&Dd>d{5O~9x0wB%to_4MY_ z&vU#!--CmoQ-(?jiA!bw00dGfnq0&y@?Q6Glj$zC>-nsG0*664-RmRABnd1oLgg|` zEFQaJE@s>6=B+Y5E*`yU6i-5=u%9q8IsMucpe;PaDVJ1yS07(@^sSVbq*P0|$#1Z} zPJaXV3YpjKQRAptU7|3>Rk&C@Y*APsnq%fxSEWS+gD`t+MrZaQx5D#9p;I31kgJ9S z1>+VH$1g^QW>1f?)M0{|egxzOt8p*SouT;Hk#XyFa3YsT*T{5`z!p>Yc#hclqc~pFE_KQZTW#P zBRuq3vt$d7=4sJ#B%u7!vW%CU5;4ECG$tt--G4q!W)PfL`)=Rzhlb$#t@D@H7(w?s z&G{_9hrcYV?>O@#7;J}qJIZ3OyQ0eBf*4mM1E*GmU=6mF-C}Emc zR6Asw|G=}zL<*pE<)wB$zJ}DQ&|s#{sZVTH@Hip3V5(B+%bpq;|Mh&CsBDPj!PaI= zkwQbU*ai+S)yiJ=iPxBi@9XWnf3xFe@3cjtw8d&-DnexJM>>jGmraUhmv}!Y3{#5K zV>HM$0lUx7`4QV)pb+_%V&)YlJsk+gQAYbjDS)y8819@FVg^(^8i&J07ZE&4H;x1o zi$KTg77c82hhn&z4I--b^A#@XL(!g*7kHHHZ9fGWaxud=9VA40);|uJc9@(!a7J=$ zlIpW{zi&W4ryCI>!D$|o_mKfxL9G$pz7s(d`9cH*0(E+v`jNMR5fhgxxlotoR%0{2 z^uB(ekJpPFcdh2@SQ}=LPckq=tUDC8i1N#hf7Iz=NB`sy$(?7}OAN&T^X zoR)h4Ca{*WBb-~q>+5KTiNsB4f(mAcUxEpdO6!GYKn5%4o@}j0ykXx<+~)7)@9ef- z_?p67q``=iiV?woxmkIhP8|hZ-P*OSF;^+ST!o_S);Z^V7bLz2B(a&=)zZ=`dXSCh(PX<1CkT>=CmmE$vbL07?zA}Ku%&0`p6L)N4yt9Lo@zbdWjx1A4#{Gt8iHSp zd?@1(IK?l9HwI!zYA8f~9&j~+)gL?RsdIgb?>F+KWIEffb^%JhQr7g2a4|M6L0@8J zvyVpdk;fMhJ^JcjM4ukEnSdgylXH7K_r2IE<3IVjOwN&_{%ei&>UA0Ekb>$ZD}SbHCWw2S|?kf?uRR=q{2E@RE%a4D*Z=#}0Z!t+5i8e5@n4fx9N?CwsIrqiH z>Gz0j+Uvj=pOqTn==bjpxj*pf13=NmLjk)CcwBq5*aB);+ODFpaEU2_*?(yarvRTV z1&c;@j9FEwAs@+;TB9bZbcIz1HoYB?|1qA~uoKDuT(g3^I-UW=yMAh4E&|_G^ChB$ z-oM3TI^S)%`@Ks|q2=sWFm!%!qanoW?orMk0^_-khVx%Zj8)o~}185e># z!h88+_V5z~zbgC+p@E-fy#VkUR$6`6eXhrew(=CAal!I9nqJ>B(7m!ep@Vo}XbZ0#$ ztaEA~LO;u3h68ZK-=;#Rs{ZlIv!g`dCL3NhaNaDNSDXgPYS4*3SU>_`5$*d-hbG0| zSA2m!r35Ug;Me=J)l;=Tsverluk7WCgTli0mZwXN%~2{?c~08{i$L%qW(bv@nE34v z$@l7)|DbUz;I8So?2VPx61c_(^L~fCg6KpRk)QozpkQ`$XsGQ(ry+^lkfITKE6>o|&9n{ALdL zN|+lX7`tYGf|I6Y+*Sz)7#$w`_IcJb-WMjemvCuByC7T2x%Eg)Esm zTuezyi)-w0A-xTXIlp#!5RGFI^1s)Fxp!{o)N9dmU;ftot;EVZ+k-J5@L`T+#tg!0 zulEx)bts7C{0!Lx)Y@cY;G%e{FC_&DcMeg?P`4i##q=L@FRy}+Vaa0|`3?em@m(I3 z0fpsD2ZrO^ENlpGK3M@)|7xwxRBlfSAFwB&tH1tm1)~X1|HU5ZgsKke9Y8%FPq*3u z3_P1o55_IS+0yAqIdV@6-t>;r_;@N7ZmdxgCv>L5QVv`?Z9gbQEly>-w?{@!ZT739AEU%ZLjZ;+<_VmK zuisYNzJGk8=irGGD|DvHtn8nhPOwKk^8aVr$C?0;7!$)S(pA(>W{;v_Zk2oE{*;~f%26M4C0ewA?9gq z_V4ceS8Lj5%n$gyEYkw^xgLMp7hw!j)ufm|y6G)>3wJGsay%wYQX<^VQaoyihkIl z4jU(v|BfV9ZNa9nTIk0VakCN^{H~r_*vm?ceRAzb*Uw@%)80XLx{yM14^{!RwXPPD z=BWlD?ho8Fn!`QL*EClz90VU>dJkv5t<>gyt1<4h$`NvBFJ!u8i44y17xhuv9*F63 zjRcE2a=Pk*WaOCQnAW|HtB5c}Zq)kMuU^jm-;baETwqOBuD9f6DR3UG)L&fI(7@#3 z+SB>uH1RO{XzaXQ!7No7G&^)X9aWTOylX8e182n3emY;~l&jgu&CQLz4Yih-ETX{n zUwISo^>&T?9(Nh+{+2pb11@HXJP~eDVd2iK6Nxrw>KrLYn{Z;8QK6xa6>HP-sVMjl zKPh_7I(@?MzH&pyIiX)S3GwjgrMxRLoCLun3m>}g#r#{R>jtq35Z-Kdx_0T zi<4@#-lM1W-(fXB#gTCeTncy1qTgCCM9sh3?v_Lr0a63P`}(MGEjF_S=+tlafS-(^ zQ=!8~%`KfG)cx*PsA=}AHFcbY8A~*Wlwjm*Q7}6PJ>*`n#*&8ZKchZ~Z`F0yy(Kna zOs#CFPdAw{#6TgjIw{gel$&`-)C_l@2jHx0T1D0o#S34XqN;R`i&RP)B4;i=5&`aB zxp(~@|BEfnAudWIU6GD~p``m)_a*&&W2Ze|wp=zv0o&C}tSi-S^X*2g*qsi@y%PE9 zffPmuDoWx%@oWu8Rk)5HJAp8f&!a#|(d!t{k?!ITv`ph}-4ZKXx*5NXTJkH)I6mDR zT3G05I7ce4q9E8FQlguo1>3wn-F^#0B^TZ;$?-d)6C7lH3WgKm{pUM44$H9U` zAKyDyvg@fl^h5Ev!K06Jag?OYU4lJxPbmcZ37Ga4G-V}HKi1KM#DhOghz zwbbxf&4Yq03zu+`sk-41uy<__2i~xIeq&UwrqzS zumSZ14#vszj>#su#l>-1St1)i?dWuw$P{SqRenN!LHpUPZ~~T~=H7&dpODMX!@x2} zdUWsO|7i}$z)K#0VzavjG&?1`xym#YrNpdU_3E4M=d;EfpZeb{_*7;n_`@^YYEOVP z1wIAx6b;HRM3~V^%RmBY{^(!DyHrwo*cCL1mDDb6d}1tUh(y9wQv1_ASjmb)iGewh?yyfLJKI^n>*k9Ao&V^s@5gg7 zGD|e78`%pfAQZ&M(d%%nLi-t?5Z^bFz!zjHi2f%~;_0?YTxANz*&%VB)`HFL*KmU2y-cXbMkjYW0I#Y zBwS2>CUV$086QsN2EZ`)dgf#3fmWUx>2X%x9Z`4$K6e(omJ2zA!4Rlk-|qp{_$OU5 zAV&HjPMW;IrQyu3s{;3I^J;fA$FSS&bk#-lq1JuUz}}ONiRm!h10@Cp&KCYPJ3HGi zj)kbYw&ho^Lm~;1ZZK4LUT$fig+X9l_?KETg78RC@%3b=4^f6bghfS#a(95Z z*A%l#RFvr@jWg?W*iSz4^vk7VcdjrHWq@tE#Rw)mV1DCgb--0TSaon+$JCjV|I~++ zs0*}|*`=I+tX`jDPmeqY(k$x_Ko-h9aGkJ{T}IOi1dDG z@6&^Q$K`+A}29%e7|X9N~&RxiW(OCU(f^MvbdtQ6q(wUcfGqc`P;5z)!XZH zO^J?vy$7SMxVPt+m5X7_?(9@J0)dgMdUo;62x1WBL6d(oo87;ZiuRkcS!-oe^=Z7V zywX2oA8pT$JQ@5W)LeRV9KCBwOF0kV+j)5;1Fi~&>AIx1JvD!-u4%`qm8y4oiB=}S zg&a@+)uPeMG-kfq4u$KC9f}==S3Y&5#r!hzghLyuwX-P~HI;L)LVkBOxa)W{S7YGb zH*VDC1K{GirX6i9@zlkz$&(bt1xPqNT8y)m``uaTgMc-d~^Hgw+WMt*0rK7XSY9Z!A)AZST z=#-ymCq$}dd-RDlD02KSx9yftS74kB+`uPvX&f8Ue8b#QHn9#sVZ0 zDn$sJ0XsJ#YT$+>Mb7LgwHT+j;tgC*;v-Hp2dZ2zyr{g4sEV4+NuJgS3@9SX{TE`8 z{eJbTzC-7u)x9;~)<|XIAg69M=&_cEHGKIZje|=cgOVa8My3dEb5&SopjXZ25DbwS z!p{jE2!U$m*~M3B8G22K3gtlPDHZa#FeyMgp683{|E=@sGHu*yd~0?}O`N2Zg2coO zLO5?YQMYSb^)L{Yq3 z@QTWgCOK-G5Sk9`W-ekoFx1zqiI}J4=9C;=Mv2yG+BM`O_i= z<=MVx*EuayrDRdTAZ6AjA8DSZ;g^u+fLmDK*9Lty^e~)#gusgdlPheE`SFj+`Yq&y~^5FS-G*|tVpP0@de zHa@Tu1n(Ws9|5w|Xg9d-Jjzz1V6C?Y=RnP+WT2$CPk^=b5sR zz2L*hP0BfbR!}IDD6J#V>cH26@FfpF!pQf|B!Xy)A3&fh5>a8dlM1=FhfY}u)A-?S z*Fz@6W4H6+360O%Abf_L(*Qn&hdH!=Cp{!0fBDyHK0#qaSv5d&OUwVDnCqpU*+p1w zx!fk`7JRk*mbF4CQU0(nMsN1{Nvitczc9h$)hLH9@A-}{*!T7ez&*jqdmJjXxH;Xu z6qSLo+9L*+d5{RFth8}%mv*<#$3IFOU4`Wx^;2rakr5nxdlOkgUT2xD&1S``WCqQR zON}npbHJcS!-AG5^-yqW0M&vV!v47Z*HbqkoO&dtdP*%rc}_~~cnrSg(IT3Pcs8=( ziRLBOj~U7gsmg*Ftp=a+#&rtR{Vae8L3q15EF03-T}2*4McqweTdvPzl`s1ziwSjr z8%BR0OIoKWjEl=^ii3mWJNJNE-j?SQ8MWF*Z5N>LiktxiQFMbOVPQmY`^7z^Dhm8VE|v-%H2zkP{L+bH{xCr@%+95} zLtDs}?qGZSfw#`q@Big7naDupC-uc=b5#amfJbiHG#_CvDS8C!ndw-lj8qo_0tr^{bU=3d~96aX}=6e`G^*?1Oe4fpj@jg z5dQa~gD}b6@M?N``e&0my@lSTmY-X3Myi9~5P~*3O7~>mm6R0(#_S0Z0M&M}(`9~w z^|zapx<&WvUB*%I*)KY!VpL#G#-?K>m;bhvgIcA0r?7T%soxjP5(Y0&Z3~0D1GUxR zfZ)9Q*_dLw>=O_(Lry#*oFi9&@Y((?om}(3f9+bp$Ze*se#pJSBq)S)EiGcx3H5 zZ6O=NOp5n#D;!!-J`@IXzLZb!54XaZ3zE5%3!THi26)rh()YD$jH|x%wY{&2mxTPv zA=+f_0yurAIlw3CHhiYwG<@EW9wj(1smc5qapViEh=^K&xtwQj0KB^+BS>|5wBQDu zQD{d-f<9^e?gvkY5ge&?S>*NhRQGt5t7Ui8#G~NiB_jVcrGRm7$@9mO)cG87;nG6+ zb>VZB4o}a5X!YP5V1&8qk@JX&bxS)^JdB?szdLok7+Wwstq{ERaI^raS8>9))(wW2 zUeS#ath88Gkx%>5&w=b*)0IMwCg8mRWs)Vbx7(CjomoXrP&)fGBhHS~&Y^XwKbS<(3u(qY!b#29$Z6Sop?4xHpdMZTeGP*^FCQp1)*NlEUdx*}E#|$xh zKYqgg&ggrcJ)~q*Xw`Gxh~)&`o6Sv4_EV9mnOS{7J`61z>D61S31vm8g`|$OPbsyw zJZAM&VI)(WIF-U{pre1w25Fi6q&D{Bt>R$e8x^c%Dzxh3)qVF{FGr)u$QHgiRl|p=jOn8uSW7O7eHu0rV+6*)bt!5>!UZ7Kg#yh;YJ z8XIY1)1@YHuZ$FatxuyQPe2sX%r?f5n%uNMaf>_s6MNY%o^s1pU#`3+(mMP8?rzal ztrFf3Ev!;3-bi%nPW)~s)k7ywj~C>7)g$^_F~aztop56BPfE&8d|zR{`#uEIpq92y z6!(GVDPj(SFtBo8IQ`sb_r|}q-yd`ZUVR_#-5rUn;jg{g3=yUi5HRCzPfgpJ)%56_ z{4yh1!MNo~dZVyer>^ADu_{o`?|t_b)Wk@uc%Q&bMHkO3AWvBsLPC8T)vllp($vp) z?G*Dc{)2B$MYUk+#bf;qt}Skcc5SEtS25YR~<1cc&E7Jt>FeEYY?eg&9u3@}8>oJiPP zubX(PNXOiyLT;5~eDVZ>;X1LJugun#5|7#|n)GqlsQB&U#mYMB3gRJe_jTsl;MBL7 z=LfdY=%Yl6`QP!ql!S~vT6GQi*xjxkIzh>`?-pQVk0$b6al)mN~5{nwLmfCrXqF8;+lG?fxKf`~As;v!Ic7 ze~yzx{6E{DgNk|uJYQ{DnLiTl-~Z)S?e50~O9qP4-^4(EF``>Dy>ARDV-$ZfVVWAS zYAAnudjL|LHCWLSI85;L_Y85tQNMCD;%Qzl5Ze#h8$VY?nwJMczjnLQxN+z?R$|nl zLi>#N$4QTH;Axq_!tV(!tHNuuk*bP>uDjGErD@Kcj4zpPks zZ0%gH)0i{s-i~;AaYO zRC|rVPgSj}EoI#CQww@~VXzCp?H2Y+++xAhQX50h2wKIRE9W_rx>POLvH`NhJpe@5 zH}<{#S7Bbh<=i&mPpyT;nULBaXQFm%n}!SiY=72;dNSAsQ_e4ZAjwE1hj$$3jMrqp z92tS9m^a>qO(D-xdUJClHJ-uuZoc%}Cg`I56GS%}BJvWRqa-zO=1BOR&|UtmVT=w> zkUdFlGdTtO>kG!F>w-30{l%nMNp&wi2%{hzb&O$G$n?${2}3~==BeUhE#j1O5sO|xgzR_HTuawsHjKz506WHL!BE}9bK_ngm+MrOk9;W8YzJn^ zo(@rEvnHh9kPxz%1Qe`QK3zuADOi7CIEtoMia;vr-=RTJ^hen$>v?- zDC184Idpy$Cb`!v*ow1-K~lx}YC83|2K+uH2L!6C05cb50h(y09-OBJjSB>|mQ(KMq*wdO({?jKdUM{;BVqO)UGIgmGjmAiJI$rIRU&+PXlGAB1pT>U2rbV#f zD@D+bNITF%=s~z7IzD|Lp*06chK?vLkGNADlYXb6p&>{@~KkQdvC}GOnC- ztGENQNKVlR-A-}!=R!}(wi@-vA6N(vv}A-$AN=25Y=E4F-WWO~CU<1)l=;W?NSF}V zM);<(vNFqwe>KtoM0_iq-(X%XO8NKiUsub|>a*Pxb%~mS_MLhq*A!#FGR7TQjr&NJ zR6{=*BZ}6oD;C$3cpJ(|D6jV6TvI$q`NdA~lJR zA_7H>71-?7!IpKcu-xLb9R!@6%o!kK?3tfH%~kLqzj=@~aovXr)r_C2Ry1BDEUw%` z6;&y}f)<&$vy-6vxAT*3iOx48HVvKc@;32h> zwjJ>y@ie|%7sjmE64jYLl05G)M30`@e9~Ax(|=z$=uZ<}cIv zOhADGGYh658(97qC-W`K^`&p)g2BTly;n=HJE(ou8Be>FaF`Y#ylG28eC zr@jG+f8{!>lca$^(ub32y5HAK?^a^x%&4xPLs8GNEELB02yfVJ*8~_z*-xTMxT9sw z^IAr8uYEG+*X)Hj^lm1$d+L9|(A7pX$u=(} zFBm@fKr~*>TxRo#8@6w2>^iMbpF26%eN@hS5{P-Y+J0ZG4X!>L+S4ys571c@%0cWn zdww3I z0hj7;JTMS6?yMl;dAJ2q`*eN^5x4zS(UGcx*B?cl|6~}r#!4Kr>DTWl_(al^qsxC{ z60J`co&HJG(W7Ogql2_x zG(ya{)5hH@DXl;JucNc@YwGW#xO8_ng2YH^q+xV}G@~1-0n#u5>4qWvkWL8!K~h3W zx?7MKt#pp`xjy>~w!7CiKIgp8xwH(FjcwR?5WI*Ud5myV+igqL#nf*-%6kk{oeQz@ zb6hgopW=z|=1Rpn?!;=5+gg@dXG%o<-M)D7(*+7T(_dY-A_0%!G3%#D2G}U%*&-o> zsM~xl5laaY(3v7YUGvovz3M5^h(@!|Z|N-Zp0LQ747+OZl;X>g#WCrK$S`0Go9+Sd zcO6X~oxcjcgtU~h_5!z;Vi;&CCXv_8nyk2~4wGijbZv?4Ltv=&N;Xi8FVS)I?F*5A z7__neutv-3$B7hflp&4~Aaevxe*U{b*{&gM!-Md2>eL`c{0A`9AG*DuKQ(o#L4HL5 zlB6=)pNVCds=zwE2F$5f{83exa-g!ZW>KO3p~G=nTKzerZEw|sWdTvrr;U#mN%0t- zUbm^4fMkpusNvb+7mQ%DPtXCE2!eNt`Vk2OjWv=%O z_k1U}vbANo*AHLkp=R69d!0P>hSwtLJcbHwds>$~ZT7&RGa2`<5f#6vE_*4d`*vu6 zcUojt{?mIiSIzCki!BNq9jo_vMvUnEf-=aB$VFhkx|WcnWPYFJ< zGr)V@PFi`$tStbE8sw$Kq6OT>Mf;;9YeJIf0nV|Y4!#B8k71)#*GA({*F5}YzF!1v zn7bPQh%TDUBpdcMKpX+*l=<*I=*qK(>y9Ax2gdski(I_f3^)?lAJ&$>*mXbLbpMrB zmonf3B;E3_zbp5`9a&us9D{DUuF#-25ydR+b=F>+M@PWE-^KFgL}0pY)jEFCdP*{z zw$G{Ma?S*chp~_r)n}Hi4N}Wx&u)eP#q4vIjbqY-&+UQ!D_~rCU77pJhkXkO82tD5 z;X-F&ID^BSK{o6`0=t}*1=i%yWQ*Uq_HKC5phZ<&yCP$pgPO;e={me_bZ4&Z*YEBt z!-Ni}}`fX5NZAh8+v z{Fe@S2xB^p)HCO{RPFbr!UeM=<>)BG{q^2UpVna+zZZn%$retj23q746u`~^1;K)7 zRE!ZAXLf<%J{{m=%vkyP`QyTjpI?|PH8^Ns>4((7-l?!FTFSx0wxsg+AH;@6Mn;x9 zgZ=TYS=FaHF=AJav`b&g7kzYISDgG1%S!>lJF+X*6W!#*6=bJvT6Sa0NRGD@P;IM- z`-ZQn&2@afSN>3bLyeDl7y!sQ=|J_0UvjfkdU`spb1=X!Z>K%5DndUeHc(6X?*9gS z*{l9S^Sl*SK368G7(#~M%K))l*IyuihWP5zSP*hPyvdpE9g)YtZnAxYtsHn;A})7Y zI--qpOLo!Fla|HNBeSe%Q)U7r|aH1zZ4w z;LSPr{uvOe@SyJmS{nWUIMxT9u;0TunrsesT%4Tssib1Bv!%${!&eJS(bavan&GFK zK+22(c=nHP&+$Q_qRa%`6PeK;3(!iB<-!=zJvU1p&g0n zuum9B%gu4sP}lSYvJHEhnOrUy^VN$(8ft2wPlk=VXBvEM+b=;=1w;IX31Xlkkj&fu zNJL}-uX*!W==HMG-v5Q**qCC4IE03_#;arRbTy&@6&;4L;zdTm%$SteT*ODfw#z#1 zh9S`p!Ftc#AOSX+%HoqeUaV}r&D0;z{M!&2mV@ElQfM1tx)FsyDp5CNd^v?V@cyX9 zib*=?QoJTOEjxRtTmi9}xkh&b?}d#PWF)Zf|E_7`Z^>m z?(IN~gD#z@TYxjbn{mew&|SCg(~GEAIH#V*)_*%B`9m~1Dw%;un!1Q0p+SuzHg~bt z5W?;4os^Fl$xObKAUvrc-lxTeGlumMnb;k^peO|Z0-9o*cAv6@db#HrnH7yxfde*8 z0!8dxWVu8=uO7r|dxX5+kXv()hs>;>I~fM8HONu#MBF2uh^6opY1!F*8IMzfH}SA) zj-$)*Od}s9=w#Qb;SZe0 zuJ3WUl&Cp~@lBs=1O);VPI*F1iFWIbbn#N2--$0Z6r2AAXZETf0vFA^W|^6yQeTg> zGCkZYilS=hk%;P=9*~)Eb-C7d`J+Cf-}o#sKr0j>+F}a_c9SM+iV~r=1eWZ;dD8GV zipnxu%z3JiV*pGMQHQ9uPRQp@iLXZv8Ij#6fL$3>DXdCUsqmY6DVSrv#em~jI50S= z)+Vt`*h_Z3IySFc34*{6QIeFYyCUy8qFY*0WbXEgqc?^Gfq~QE{o&Lin zc>v)n^<=Ff+wfpP|0OrvaK`J6{ArgSJf7Z!RA0Uz&tQ9T6hmoEWq*4411YwriHk9X zW5)2ni92Z`k8K%?3P^JYMBgUa!ddp{f9?ZE&pUx|&Zx&#Ea8}|byUjl)!)^wRDV!} zjuZHu5Oe(i_y7Z_m~)Mi61$&l6cs3Z#u(?4$p;6W$!V}GJ7R;?D?^nzKxt5kF|18> zmX?$U8;8okPfEvT*S_IWrpj3(CDJL8=CXJ`DmGrU14d}win*dE@2&4WOs8=h522OF z8$ZUbri)}t5(_k-P$UHT&BreS0s;YMBJl0axN4<1@yocfM)2=iQM!h*#i59JRV@}; z96+H~Z?bO1R?Z;HG*;r2u~*=fnfno<#4lp-*(j6XTq&ci!D(OabZ;+;j#5o8&2T=8 z%p1MKg^HV93`E@Yz8HynK56=rEmL2Ky~!>6?Q6^-osQQ`;Lk`Is)^gyDtg>BI@0<} zfX2OXs4)r3&2B)l)Tu|tm0bRchT!p<@0(xb>IX*qyR^_`aPUL~jT3jwj)=<-{xGBP3o#pSK?U3j{*8EKw zQ21iGSYo_BZWVH-#^Lu8y3A!HT_%=a%%ryTs~A}bNOO&2>2A^6`@#7lr>AVNy?h`zl(TRvg$0f_P)IJ z5cf@@Fb^UesF9~=U=2LwtY%5dKUp3DM@`Zg~BF&9q>2Lzyz1?5SBa77Wfd)CMiZn2^yE}!S==F^U-qf z@+num>J%My&V`0?$JdWL$~@kPejKJzAsPH17;KIYS-Pc8S8pLkH2FI2D1JkP> zo8ual3y|^%CHCAMWvC)Cj8Q0LPin0$de_I;rb7pQ4$q?dqH1Yr>B>XR5P~65iox&{vc39c+w(VgQrWVbx$708qCb~k0BLn84hc*G zkOs%%hyDA0l}mlS;_O^-rrc0L7EIy?#=#!2%Z@6Ab&+CoC?^K?E07Wn4_Ar??E7tf zb0e!REhEJS7K9_z=mo$Bni_>vqXfSj+VJ8J2|eR zpJ#J#2wR~^IqH232vZHg&d#sFS2Vz!c=@6k=B?nV|RKa#Ft?IE|VU%!3@BPs>7fUz#w41*MBiP_LZ$oB+b8Rlm> z^t9ZzHrfBAxIc!+5*Z>$QdUYhG}!E`xu1I!ba+2+I)6TNF)k>0-3=T(Omm;jZ{D1J z&E{bD$w)_Js05Ur)#k;&pX>8Ae)NunTf_}ikUy!Crf@M!2J4iM9%)m;qLp42IJ?04 zshd$!#KxTB>^0{@L8E3ESEE_MaU+sX3(;bUSA}+Nk7B&`Q&(bC zzO-zWK2{g_aIAp7uEM%*d%c~^Ao(Qzr*F9>D5am{(EJyxO0sXIRrgcSG%sgi(M!wE zi$|fNu(N~skNZwbKZbx9LdPqQL*#Ug_ugYjF{uEyvSZrwju3Gy9_{S4RRuT~ePn3? z@v-KzYVt)Wgb1dqE6vwZt7K#}0mNN@*$S8^H;@^e0{vE|N>NdU(gTd4Un)#=HU$A@ zhKD51fi)Dj^VjCLi9djPS`FO#zx_J)$?fH?CRdkDn7eDHq{!sqz$vAPI>W-IDG4QF@Slc$tbe@ETWtF> zMZA!&B%W<8N>)lXNg8Bv0W=EIE!HvUe>M|ocKvAPIF^03i#&siDQ-W0KVSD=3)o8f z51@&s#Lt{o&jCbYk{Yzs^$&8A9c%V|dPK2lY za@Ru$pSa3H^6Q|y6xCYy+1jHCzF+O`|4F$2yjQoGE&Zz7Lk#ioPwzXZaco-VCOvxNN5OT`lx+SUUc)I)J$&zOh6mMS1xuaLvakXGB1b;{p z>+#vRy}kJYDP5mXfFr&aQ18?7A(O;+?pv8aolGh~OPoGP^Ne#0!8)+mY10?|W*;RI z^NQg2#n&7WU#H|exw^0=%JnZ>Dl6mJuYBevyB{Euylms@M)`wof(=cOjEvWA)FwbE zz|3CwKa1hYVbO+>L&t<=8}dXx`IpEJb&-!_)Ry|B<^75YnTO!vlYW-Q;rAjo7fH$` z<8GpR#+CXzg#jk4Ki&<90)$o!NUM`lV#gfhbrIuucB>N9DePf$Wit}sN|JvE*VpYs^ zMOcVNgpA*1Ym%#bUj3eD$Of~q+7Ap?ltJ5;gnwq*)(RydsgZ|B3N5B!^ z43L-x`7+WpHez}r2|BDlQ?EFVEwa5ZC~yKG-m+Tk8MQzVBoG*U-l)j-h1t+l*H<)~ zC8WO%K;g<{hKlDRS2bRT(C(-M?Vw3i+Q!)H3KeB#w>5zzb>!p}ru92`Em^V7U!ZA5 z1a65{gm$KrOj!G)Xqb`VBJKj(b-;b)V;2&U@Nqd@9Y2AoVzyGm5UB5DJ|#tyyS4EW zv~<)ZyBTsS2wnKEMsEH>?+2ab02SivYE9#RTtr4i&OoUe{tW8y$R|$ysj(rhGi9oP=oObA78hXk-1q7+e`J_;w&-eXd_Rx@Xva z$lxKwRWAZ0Gjlh?hXOR00TeQ=Q@}brr=?xW&iI2^4?u&;M6y$Ekh#C?@r*m|lHv;p zGx3)qocVG0afc?XAaC@0U8u;EfBj5UaBKby`cks3`=I2;p=Y8$Mpe=?hP%z_mN^P#P8G; z>uO*-7%m4;6GyM$)o|m%vq)!`H_mX$A~TQd4;rgUZ$7yMwVH&mR%1TV(84X1FUDkz zU{D*gAo_CT(mhUy2Y2Q()slQcyA3>H;ok4|f|X|V7OqF{MjCdNJGtu~XcPP{aG!SfPt}I)#K4( zjGK@G&%K(EJoyOM21;E@hV0@T#T#}X#mN0{eWF3I1FSmk%A6ih7i z>C-i)wbsO31D^ttM({vg>O!ipvHfYCU%@Be-+B;H{+0jB0FphhmL?UvcK+j@B{Jds z;ePG@q~mF$n=#=1D`#DDz46m|V=L*O)yDi|VGxBvvrHz>nNqdmwa(y=jZ9E<>6W;h zXY5rZWG+mz4IiBo0x?V-tQ(iB>dBNKzK~U3jM|>_baIYt`F?Oy!>GORodG7Me7#=c zt=~i})MUg0{i#>>j05~|^p-3mwu+hPm(`WT^Rs>colyZ|TnFxlB^mjM=(a43oc1Y{ z4o8^d2>ewBhfbH(1IOkvJ1Bnq%bW80LR?D(1 z+m)n><>8gcZPms4XE^X%Yh@<~2Zxi@wnnzqTy3YO;^JZnqv|FzKtHFrb8nEOr|b~n zYoKJ?YMDJC1tJ_9-r^XQDL}klCPC|8Vg<(&vCkOft0_rO*sVp;UZ<0!edDw)jCzc9 zEs#p_glJo=F`doIlhX~z7TgO3`*!)Oycf)AHKq+CsKqiN;pvH_Ii0iS+gnFX?MvK5V*hr3`Z3;G zmgV6qSg1|UEfNF%LnbT>1H`*ax4T%vp3N;HGVchCAwY^-Lc<&@s^ld=(8o64s;Q|d zZ9l2ZR~e@0nh~^JOh{HJ96VpIGz`fA3Uo>Uers3e%&BZMMfPt8biQgnwCmPSBf}2H zRM-A?WkHm;QZr|qa&f9CQk0NJ3JDs?q8j_YZEX|(Zv!Or0D=fviei}j{q${!zPSbJ z$6ufHN>&HiHIcRhK#fzR)kjhH--*1&RP5SlgJ1l477QAN z9SET?P6o)lry7-Mw;-oVSNZ|P4?7j#P^YvyI6W8&z}I-Zh!K7&-D_wq_BxHnyjRs| z?IY;cbJ?J+(lv!qbn)&$Ehwqt5g7`f(+#+37|9nSniC^n!1glqLrSO-RdH}LOE^+U zwjXr>mq}2HOd?u#TE~*O6eEIla^u_!3dcny!h2A%@os}0N_9=1V96$7*2xd!;y>=j ze|wRLaKH7e)JJ=ec(xMWd=W%p$aG6&RQ*K{D36Dw8R2fw|Viue6?|eB^t3Jw5u2duo*c)Zv z>D@l$Mg650jk{NuaLK3;@bht_-WA|!XmPDFM4_|1-Fs3{?*-qVZ++MLzzM_q!Anrk zi&2d=`!{y7?_?Ciu6p9HUt?5O8R|}7R3MeyyGz4ki-k}44r8>z3+!5fjslqCvNkNS2`W+@jitE{%Y=v5kpeav`{WyGDIG6u<$TlBU` zldIoj2E}2;b+;N1-@aJQO9EB>VqDR_V~cD|7J)FDH0-d)wWOH6b&&y^N2OC#lL;Enuj+L^HCP4WYmhJ`aB*?jTjGysNNS~eWr9-j*`U=nma5eoeI}GSROT?|-0k5GFHK8tn@lp}F zI0sNz0q$)>om%%UvCs6>C-l=S=4|cIxlek3RmRuUr6=^j44P+p?4XB9pGiJ-&((eA z=z>BpWG$v&@)s1p_cOzqReze90;FTZNdJrx&+LV9G|LVFMw;5E_ralG^)y*w5pQ1T z=NAShgvptNFi84I2?O1rYCxYTfwCH72e8@;Iya-~2<@0W^+bdb6qjf6w?KY2a0O^S zXClrM5mvX}LW%>}F~Pq~5yrn*2HhHH40N^P^VDw5OsBVcp$`#sW)*)q)y%R{1UCl- z_|W3le+UnuFa7xk4W*ikKBALI<>+zypn~rz66=;+YZd6X5xq~~?+Pe%9MAdI52mGM zL&Epw=H~kTl@P|dc>ZfkZAGOB1lmcv?*ZudWEP&UY>G4q$^BPi{x3sx0t8pulJ@)W z&Ptn_S8dbGeZAG!2U;eG?4d)v6M&M%HR>%ATVRROWldTO8YSIa9NtR!yGuz7OP0II zitvGke(5Jv-s>QjS@_H|RZxcMA|7C}Gr5~$ov#t^Ta!N{%6u6z7ra|t;JClt%ZCj)3%hPz3y&!3ouPdDW$Dx(OXA~yxTJS7RRlip%HZlYF%EG) z=InU{RUeodi3dmx_w~RoD)=>^m=XRA@V7A7WSue;3M(}iS@hZ78+qJ;?{v=uJ`rmw z=Y5=+OX8PFZC>SuDBoLnbOo*e5v9lJe<7rE#;GaJkAf6TPCpW9JVSv_$2=l>@$cnb z@|X_#RVnIu1Dj`g?jYuH+-UkI$jqOO==CLUJUr$X;p9{|9J(RUQcCLXAL~sIDP+c_ z3>aEl)K+=JS<(YdcqR%s3Y590zM4^H#Az|~+hM#Gjr*3^->jmnNJ&9orC<6~+kYTlx$?kDpJW3f?cWihqAO((1xTd|` z{y`|Vd-;3$Vym>YlojDZODj(TM{GSP;GSmaWL-T5U6iu}_vmg*9Yela73CkfPtSxh zgk5IryMa;-(=o@a<@9}FTuhyL*LgfOnl`|PIYQ>8=ejn%Fr^0#gN(-d9EmSI$o)q8 zjea(?ZFtF0y!^(D&Bui_0t%$z9*R1+_3vLS422BlSLSu>*ngKCWZI zeM_0{gM68b`RS7ej{O8Jb#~LT&z@K;3?;$_S39!O-ILyE=Sb$iq`*4_9yoR~IdR#; zDO8u)(iJrO7l5}AIKN?!id$#Ta@*Z%t?J`~D@W Yf1tfH-Mc>rUajy3;re+?^ZRsaA1 From ad874a953b6275c9cb312cdb4c576407c1d7d4db Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Thu, 22 Aug 2024 15:29:17 -0700 Subject: [PATCH 072/163] update --- .../workflow/operators/hashJoin/HashJoinBuildOpExec.scala | 2 +- .../ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala | 2 +- .../workflow/operators/hashJoin/HashJoinProbeOpExec.scala | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala index e1042b60fa7..3c8d2bda78d 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinBuildOpExec.scala @@ -31,4 +31,4 @@ class HashJoinBuildOpExec[K](buildAttributeName: String) extends OperatorExecuto override def close(): Unit = { buildTableHashMap.clear() } -} \ No newline at end of file +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala index 51dd37ac962..c4e315fd88d 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinOpDesc.scala @@ -177,4 +177,4 @@ class HashJoinOpDesc[K] extends LogicalOp { } builder.build() } -} \ No newline at end of file +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala index 8fa1f9efd6b..c6a5e0bef85 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/hashJoin/HashJoinProbeOpExec.scala @@ -124,4 +124,4 @@ class HashJoinProbeOpExec[K]( buildTableHashMap.clear() } -} \ No newline at end of file +} From 770deba9e889b90b8a08c2b19459b0ee91f2d5b7 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Thu, 22 Aug 2024 15:33:05 -0700 Subject: [PATCH 073/163] update --- .../scala/edu/uci/ics/amber/engine/common/SerializedState.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SerializedState.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SerializedState.scala index 0ba1d11b864..d0f28804a5c 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SerializedState.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/common/SerializedState.scala @@ -27,4 +27,4 @@ case class SerializedState(bytes: Array[Byte], serializerId: Int, manifest: Stri def size(): Long = { bytes.length } -} \ No newline at end of file +} From fc388081bcbca5bdf138d71d449fb3e5f8ff48a0 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Thu, 22 Aug 2024 16:04:14 -0700 Subject: [PATCH 074/163] update --- .../uci/ics/texera/workflow/common/operators/LogicalOp.scala | 5 ----- 1 file changed, 5 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala index 7984f5dbf7c..a4a770f9f08 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala @@ -50,7 +50,6 @@ import edu.uci.ics.texera.workflow.operators.source.sql.mysql.MySQLSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.sql.postgresql.PostgreSQLSourceOpDesc import edu.uci.ics.texera.workflow.operators.split.SplitOpDesc import edu.uci.ics.texera.workflow.operators.symmetricDifference.SymmetricDifferenceOpDesc -import edu.uci.ics.texera.workflow.operators.state.{DataToStateOpDesc, DualInputStateReceiverOpDesc, StateReceiverOpDesc, StateToDataOpDesc} import edu.uci.ics.texera.workflow.operators.typecasting.TypeCastingOpDesc import edu.uci.ics.texera.workflow.operators.udf.java.JavaUDFOpDesc import edu.uci.ics.texera.workflow.operators.udf.python.source.PythonUDFSourceOpDescV2 @@ -147,10 +146,6 @@ trait StateTransferFunc new Type(value = classOf[AsterixDBSourceOpDesc], name = "AsterixDBSource"), new Type(value = classOf[TypeCastingOpDesc], name = "TypeCasting"), new Type(value = classOf[LimitOpDesc], name = "Limit"), - new Type(value = classOf[DataToStateOpDesc], name = "DataToState"), - new Type(value = classOf[StateToDataOpDesc], name = "StateToData"), - new Type(value = classOf[StateReceiverOpDesc], name = "TestingStateReceiver"), - new Type(value = classOf[DualInputStateReceiverOpDesc], name = "DualInputTestingStateReceiver"), new Type(value = classOf[RandomKSamplingOpDesc], name = "RandomKSampling"), new Type(value = classOf[ReservoirSamplingOpDesc], name = "ReservoirSampling"), new Type(value = classOf[HashJoinOpDesc[String]], name = "HashJoin"), From 64197e2b6a7f53a02e0b1a5291d1de3c7f7fde14 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 4 Sep 2024 17:27:39 -0700 Subject: [PATCH 075/163] update --- core/amber/src/main/python/core/models/payload.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/amber/src/main/python/core/models/payload.py b/core/amber/src/main/python/core/models/payload.py index 48ae73ee818..9bc4994f8d6 100644 --- a/core/amber/src/main/python/core/models/payload.py +++ b/core/amber/src/main/python/core/models/payload.py @@ -1,5 +1,7 @@ from dataclasses import dataclass + from pyarrow.lib import Table + from core.models.marker import Marker @@ -7,10 +9,12 @@ class DataPayload: pass + @dataclass class DataFrame(DataPayload): frame: Table + @dataclass class MarkerFrame(DataPayload): frame: Marker From b6365468c5e6981d5082d8f7440ba8dfb31fd5af Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 4 Sep 2024 17:28:39 -0700 Subject: [PATCH 076/163] update --- core/amber/src/main/python/core/runnables/data_processor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index 8c003a18ab8..ab8c990c483 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -102,12 +102,12 @@ def process_tuple(self) -> None: finally: self._switch_context() - def _set_output_tuple(self, output_tuple): + def _set_output_tuple(self, output_tuple) -> None: if output_tuple is not None: output_tuple.finalize(self._context.output_manager.get_port().get_schema()) self._context.tuple_processing_manager.current_output_tuple = output_tuple - def _set_output_state(self, output_state: State): + def _set_output_state(self, output_state: State) -> None: self._context.tuple_processing_manager.current_output_state = output_state def _switch_context(self) -> None: From 7750b7fdded1457f9bf0f03a4c5b768591c99a00 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 4 Sep 2024 17:31:23 -0700 Subject: [PATCH 077/163] update --- .../workflow/common/operators/LogicalOp.scala | 42 +++++++++++++++++-- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala index a4a770f9f08..a3e6fa1b18a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala @@ -34,11 +34,42 @@ import edu.uci.ics.texera.workflow.operators.regex.RegexOpDesc import edu.uci.ics.texera.workflow.operators.reservoirsampling.ReservoirSamplingOpDesc import edu.uci.ics.texera.workflow.operators.sentiment.SentimentAnalysisOpDesc import edu.uci.ics.texera.workflow.operators.sink.managed.ProgressiveSinkOpDesc -import edu.uci.ics.texera.workflow.operators.sklearn.{SklearnAdaptiveBoostingOpDesc, SklearnBaggingOpDesc, SklearnBernoulliNaiveBayesOpDesc, SklearnComplementNaiveBayesOpDesc, SklearnDecisionTreeOpDesc, SklearnDummyClassifierOpDesc, SklearnExtraTreeOpDesc, SklearnExtraTreesOpDesc, SklearnGaussianNaiveBayesOpDesc, SklearnGradientBoostingOpDesc, SklearnKNNOpDesc, SklearnLinearRegressionOpDesc, SklearnLinearSVMOpDesc, SklearnLogisticRegressionCVOpDesc, SklearnLogisticRegressionOpDesc, SklearnMultiLayerPerceptronOpDesc, SklearnMultinomialNaiveBayesOpDesc, SklearnNearestCentroidOpDesc, SklearnPassiveAggressiveOpDesc, SklearnPerceptronOpDesc, SklearnPredictionOpDesc, SklearnProbabilityCalibrationOpDesc, SklearnRandomForestOpDesc, SklearnRidgeCVOpDesc, SklearnRidgeOpDesc, SklearnSDGOpDesc, SklearnSVMOpDesc} +import edu.uci.ics.texera.workflow.operators.sklearn.{ + SklearnAdaptiveBoostingOpDesc, + SklearnBaggingOpDesc, + SklearnBernoulliNaiveBayesOpDesc, + SklearnComplementNaiveBayesOpDesc, + SklearnDecisionTreeOpDesc, + SklearnDummyClassifierOpDesc, + SklearnExtraTreeOpDesc, + SklearnExtraTreesOpDesc, + SklearnGaussianNaiveBayesOpDesc, + SklearnGradientBoostingOpDesc, + SklearnKNNOpDesc, + SklearnLinearRegressionOpDesc, + SklearnLinearSVMOpDesc, + SklearnLogisticRegressionCVOpDesc, + SklearnLogisticRegressionOpDesc, + SklearnMultiLayerPerceptronOpDesc, + SklearnMultinomialNaiveBayesOpDesc, + SklearnNearestCentroidOpDesc, + SklearnPassiveAggressiveOpDesc, + SklearnPerceptronOpDesc, + SklearnPredictionOpDesc, + SklearnProbabilityCalibrationOpDesc, + SklearnRandomForestOpDesc, + SklearnRidgeCVOpDesc, + SklearnRidgeOpDesc, + SklearnSDGOpDesc, + SklearnSVMOpDesc +} import edu.uci.ics.texera.workflow.operators.sort.SortOpDesc import edu.uci.ics.texera.workflow.operators.sortPartitions.SortPartitionsOpDesc import edu.uci.ics.texera.workflow.operators.source.apis.reddit.RedditSearchSourceOpDesc -import edu.uci.ics.texera.workflow.operators.source.apis.twitter.v2.{TwitterFullArchiveSearchSourceOpDesc, TwitterSearchSourceOpDesc} +import edu.uci.ics.texera.workflow.operators.source.apis.twitter.v2.{ + TwitterFullArchiveSearchSourceOpDesc, + TwitterSearchSourceOpDesc +} import edu.uci.ics.texera.workflow.operators.source.fetcher.URLFetcherOpDesc import edu.uci.ics.texera.workflow.operators.source.scan.FileScanSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.scan.csv.CSVScanSourceOpDesc @@ -53,7 +84,12 @@ import edu.uci.ics.texera.workflow.operators.symmetricDifference.SymmetricDiffer import edu.uci.ics.texera.workflow.operators.typecasting.TypeCastingOpDesc import edu.uci.ics.texera.workflow.operators.udf.java.JavaUDFOpDesc import edu.uci.ics.texera.workflow.operators.udf.python.source.PythonUDFSourceOpDescV2 -import edu.uci.ics.texera.workflow.operators.udf.python.{DualInputPortsPythonUDFOpDescV2, PythonLambdaFunctionOpDesc, PythonTableReducerOpDesc, PythonUDFOpDescV2} +import edu.uci.ics.texera.workflow.operators.udf.python.{ + DualInputPortsPythonUDFOpDescV2, + PythonLambdaFunctionOpDesc, + PythonTableReducerOpDesc, + PythonUDFOpDescV2 +} import edu.uci.ics.texera.workflow.operators.udf.r.{RUDFOpDesc, RUDFSourceOpDesc} import edu.uci.ics.texera.workflow.operators.union.UnionOpDesc import edu.uci.ics.texera.workflow.operators.unneststring.UnnestStringOpDesc From ab93577b36956a68dde28ce14a44a831188e3e16 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 4 Sep 2024 17:32:08 -0700 Subject: [PATCH 078/163] update --- core/amber/src/main/python/core/runnables/data_processor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index ab8c990c483..101b6f0a6db 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -34,8 +34,8 @@ def run(self) -> None: self._switch_context() def process_state(self) -> None: - state_ = self._context.tuple_processing_manager.get_input_state() - if state_ is not None: + state = self._context.tuple_processing_manager.get_input_state() + if state is not None: try: executor = self._context.executor_manager.executor port_id = self._context.tuple_processing_manager.current_input_port_id @@ -50,7 +50,7 @@ def process_state(self) -> None: self._context.worker_id, self._context.console_message_manager.print_buf, ): - self._set_output_state(executor.process_state(state_, port)) + self._set_output_state(executor.process_state(state, port)) except Exception as err: logger.exception(err) From 86282de8d81f0a7f30ca56426645ee44aa0843cf Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 4 Sep 2024 18:42:33 -0700 Subject: [PATCH 079/163] update --- .../src/main/python/core/models/marker.py | 23 +++++++++++-------- .../python/core/runnables/network_receiver.py | 2 +- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/core/amber/src/main/python/core/models/marker.py b/core/amber/src/main/python/core/models/marker.py index d04673b6378..b241e5c31e7 100644 --- a/core/amber/src/main/python/core/models/marker.py +++ b/core/amber/src/main/python/core/models/marker.py @@ -15,25 +15,28 @@ class State(Marker): def __init__(self): self.data = {} - def add(self, key, value): + def add(self, key: str, value: any) -> None: self.data[key] = value - def get(self, key): + def get(self, key: str) -> any: return self.data[key] - def to_table(self): - return Table.from_pandas(df=DataFrame([self.data])) - - def from_dict(self, dictionary): + def from_dict(self, dictionary: dict) -> "State": for key, value in dictionary.items(): self.add(key, value) return self - def __setitem__(self, key, value): - self.data[key] = value + def to_table(self) -> Table: + return Table.from_pandas(df=DataFrame([self.data])) - def __getitem__(self, key): - return self.data[key] + def from_table(self, table: Table) -> "State": + return self.from_dict(table.to_pandas().iloc[0].to_dict()) + + def __setitem__(self, key: str, value: any): + self.add(key, value) + + def __getitem__(self, key: str) -> any: + return self.get(key) def __str__(self) -> str: content = ", ".join( diff --git a/core/amber/src/main/python/core/runnables/network_receiver.py b/core/amber/src/main/python/core/runnables/network_receiver.py index 686223eeacc..43a16583e3b 100644 --- a/core/amber/src/main/python/core/runnables/network_receiver.py +++ b/core/amber/src/main/python/core/runnables/network_receiver.py @@ -67,7 +67,7 @@ def data_handler(command: bytes, table: Table) -> int: if payload_type == "Data": payload = DataFrame(table) elif payload_type == "State": - payload = MarkerFrame(State().from_dict(table.to_pandas().iloc[0].to_dict())) + payload = MarkerFrame(State().from_table(table)) elif payload_type == "EndOfUpstream": payload = MarkerFrame(EndOfUpstream()) shared_queue.put( From 4fb386cb891d20de6cf21485e884efb4fa7cc2f5 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Fri, 6 Sep 2024 23:18:35 -0700 Subject: [PATCH 080/163] update --- core/amber/src/main/python/core/models/operator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/amber/src/main/python/core/models/operator.py b/core/amber/src/main/python/core/models/operator.py index 5f63f49e807..3a6a9c4a019 100644 --- a/core/amber/src/main/python/core/models/operator.py +++ b/core/amber/src/main/python/core/models/operator.py @@ -48,15 +48,15 @@ def close(self) -> None: """ pass - def process_state(self, state_: State, port: int) -> State: + def process_state(self, state: State, port: int) -> State: """ Process an input State from the given link. - :param state_: State, a State from an input port to be processed. + :param state: State, a State from an input port to be processed. :param port: int, input port index of the current exhausted port. :return: State, producing one State object """ - return state_ + return state def produce_state(self, port: int) -> State: """ From 693ac00c76cd724a013216523ba61a68439f22ff Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sat, 7 Sep 2024 01:07:36 -0700 Subject: [PATCH 081/163] update --- .../ics/texera/workflow/common/operators/OperatorExecutor.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala index 28b5dc98509..2c79feb9ecf 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala @@ -20,7 +20,7 @@ trait OperatorExecutor { def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] - def onFinishProduceState(port: Int): State = null + def onFinishProduceState(port: Int): Option[State] = None def onFinishMultiPort(port: Int): Iterator[(TupleLike, Option[PortIdentity])] = { onFinish(port).map(t => (t, None)) From 823919d579c0a482d1884a3465619c23cb2b138d Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sat, 7 Sep 2024 01:09:44 -0700 Subject: [PATCH 082/163] update --- .../ics/amber/engine/architecture/worker/DataProcessor.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index d7b89ae9129..a2fef62d36f 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -94,8 +94,8 @@ class DataProcessor( executor.onFinishMultiPort(portId) ) val outputState = executor.onFinishProduceState(portId) - if (outputState!= null) { - outputManager.emitMarker(outputState) + if (outputState.isDefined) { + outputManager.emitMarker(outputState.get) } } catch safely { case e => From 436e11366c7fe2781fca248cfa57e3d8593f98d3 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sat, 7 Sep 2024 05:29:23 -0700 Subject: [PATCH 083/163] update --- .../core/architecture/sendsemantics/one_to_one_partitioner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py index 5a788d89c23..c3e405e6d88 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py @@ -38,7 +38,7 @@ def add_tuple_to_batch( def add_state_to_batch(self, state: State): if len(self.batch) > 0: yield self.receiver, deepcopy(self.batch) - self.batch.clear() + self.reset() yield self.receiver, state From 4a606e498f7ea95c4d11635b69a5ed3ea09afc88 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sun, 8 Sep 2024 16:34:11 -0700 Subject: [PATCH 084/163] update --- .../workflow/common/operators/LogicalOp.scala | 47 +++----------- .../operators/state/DataToStateOpDesc.scala | 46 ++++++++++++++ .../operators/state/DataToStateOpExec.scala | 28 +++++++++ .../state/DualInputStateReceiverOpDesc.scala | 42 +++++++++++++ .../state/DualInputStateReceiverOpExec.scala | 26 ++++++++ .../operators/state/StateReceiverOpDesc.scala | 42 +++++++++++++ .../operators/state/StateReceiverOpExec.scala | 18 ++++++ .../operators/state/StateToDataOpDesc.scala | 62 +++++++++++++++++++ .../operators/state/StateToDataOpExec.scala | 32 ++++++++++ 9 files changed, 304 insertions(+), 39 deletions(-) create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpExec.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpExec.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala index a3e6fa1b18a..60f5210feb5 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala @@ -34,42 +34,11 @@ import edu.uci.ics.texera.workflow.operators.regex.RegexOpDesc import edu.uci.ics.texera.workflow.operators.reservoirsampling.ReservoirSamplingOpDesc import edu.uci.ics.texera.workflow.operators.sentiment.SentimentAnalysisOpDesc import edu.uci.ics.texera.workflow.operators.sink.managed.ProgressiveSinkOpDesc -import edu.uci.ics.texera.workflow.operators.sklearn.{ - SklearnAdaptiveBoostingOpDesc, - SklearnBaggingOpDesc, - SklearnBernoulliNaiveBayesOpDesc, - SklearnComplementNaiveBayesOpDesc, - SklearnDecisionTreeOpDesc, - SklearnDummyClassifierOpDesc, - SklearnExtraTreeOpDesc, - SklearnExtraTreesOpDesc, - SklearnGaussianNaiveBayesOpDesc, - SklearnGradientBoostingOpDesc, - SklearnKNNOpDesc, - SklearnLinearRegressionOpDesc, - SklearnLinearSVMOpDesc, - SklearnLogisticRegressionCVOpDesc, - SklearnLogisticRegressionOpDesc, - SklearnMultiLayerPerceptronOpDesc, - SklearnMultinomialNaiveBayesOpDesc, - SklearnNearestCentroidOpDesc, - SklearnPassiveAggressiveOpDesc, - SklearnPerceptronOpDesc, - SklearnPredictionOpDesc, - SklearnProbabilityCalibrationOpDesc, - SklearnRandomForestOpDesc, - SklearnRidgeCVOpDesc, - SklearnRidgeOpDesc, - SklearnSDGOpDesc, - SklearnSVMOpDesc -} +import edu.uci.ics.texera.workflow.operators.sklearn.{SklearnAdaptiveBoostingOpDesc, SklearnBaggingOpDesc, SklearnBernoulliNaiveBayesOpDesc, SklearnComplementNaiveBayesOpDesc, SklearnDecisionTreeOpDesc, SklearnDummyClassifierOpDesc, SklearnExtraTreeOpDesc, SklearnExtraTreesOpDesc, SklearnGaussianNaiveBayesOpDesc, SklearnGradientBoostingOpDesc, SklearnKNNOpDesc, SklearnLinearRegressionOpDesc, SklearnLinearSVMOpDesc, SklearnLogisticRegressionCVOpDesc, SklearnLogisticRegressionOpDesc, SklearnMultiLayerPerceptronOpDesc, SklearnMultinomialNaiveBayesOpDesc, SklearnNearestCentroidOpDesc, SklearnPassiveAggressiveOpDesc, SklearnPerceptronOpDesc, SklearnPredictionOpDesc, SklearnProbabilityCalibrationOpDesc, SklearnRandomForestOpDesc, SklearnRidgeCVOpDesc, SklearnRidgeOpDesc, SklearnSDGOpDesc, SklearnSVMOpDesc} import edu.uci.ics.texera.workflow.operators.sort.SortOpDesc import edu.uci.ics.texera.workflow.operators.sortPartitions.SortPartitionsOpDesc import edu.uci.ics.texera.workflow.operators.source.apis.reddit.RedditSearchSourceOpDesc -import edu.uci.ics.texera.workflow.operators.source.apis.twitter.v2.{ - TwitterFullArchiveSearchSourceOpDesc, - TwitterSearchSourceOpDesc -} +import edu.uci.ics.texera.workflow.operators.source.apis.twitter.v2.{TwitterFullArchiveSearchSourceOpDesc, TwitterSearchSourceOpDesc} import edu.uci.ics.texera.workflow.operators.source.fetcher.URLFetcherOpDesc import edu.uci.ics.texera.workflow.operators.source.scan.FileScanSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.scan.csv.CSVScanSourceOpDesc @@ -84,12 +53,7 @@ import edu.uci.ics.texera.workflow.operators.symmetricDifference.SymmetricDiffer import edu.uci.ics.texera.workflow.operators.typecasting.TypeCastingOpDesc import edu.uci.ics.texera.workflow.operators.udf.java.JavaUDFOpDesc import edu.uci.ics.texera.workflow.operators.udf.python.source.PythonUDFSourceOpDescV2 -import edu.uci.ics.texera.workflow.operators.udf.python.{ - DualInputPortsPythonUDFOpDescV2, - PythonLambdaFunctionOpDesc, - PythonTableReducerOpDesc, - PythonUDFOpDescV2 -} +import edu.uci.ics.texera.workflow.operators.udf.python.{DualInputPortsPythonUDFOpDescV2, PythonLambdaFunctionOpDesc, PythonTableReducerOpDesc, PythonUDFOpDescV2} import edu.uci.ics.texera.workflow.operators.udf.r.{RUDFOpDesc, RUDFSourceOpDesc} import edu.uci.ics.texera.workflow.operators.union.UnionOpDesc import edu.uci.ics.texera.workflow.operators.unneststring.UnnestStringOpDesc @@ -121,6 +85,7 @@ import edu.uci.ics.texera.workflow.operators.visualization.ternaryPlot.TernaryPl import org.apache.commons.lang3.builder.{EqualsBuilder, HashCodeBuilder, ToStringBuilder} import org.apache.zookeeper.KeeperException.UnimplementedException import edu.uci.ics.texera.workflow.operators.machineLearning.Scorer.MachineLearningScorerOpDesc +import edu.uci.ics.texera.workflow.operators.state.{DataToStateOpDesc, DualInputStateReceiverOpDesc, StateReceiverOpDesc, StateToDataOpDesc} import edu.uci.ics.texera.workflow.operators.visualization.quiverPlot.QuiverPlotOpDesc import edu.uci.ics.texera.workflow.operators.visualization.contourPlot.ContourPlotOpDesc import edu.uci.ics.texera.workflow.operators.visualization.figureFactoryTable.FigureFactoryTableOpDesc @@ -182,6 +147,10 @@ trait StateTransferFunc new Type(value = classOf[AsterixDBSourceOpDesc], name = "AsterixDBSource"), new Type(value = classOf[TypeCastingOpDesc], name = "TypeCasting"), new Type(value = classOf[LimitOpDesc], name = "Limit"), + new Type(value = classOf[DataToStateOpDesc], name = "DataToState"), + new Type(value = classOf[StateToDataOpDesc], name = "StateToData"), + new Type(value = classOf[StateReceiverOpDesc], name = "TestingStateReceiver"), + new Type(value = classOf[DualInputStateReceiverOpDesc], name = "DualInputTestingStateReceiver"), new Type(value = classOf[RandomKSamplingOpDesc], name = "RandomKSampling"), new Type(value = classOf[ReservoirSamplingOpDesc], name = "ReservoirSampling"), new Type(value = classOf[HashJoinOpDesc[String]], name = "HashJoin"), diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala new file mode 100644 index 00000000000..c8733f410ef --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala @@ -0,0 +1,46 @@ +package edu.uci.ics.texera.workflow.operators.state + +import edu.uci.ics.amber.engine.architecture.deploysemantics.{PhysicalOp, SchemaPropagationFunc} +import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo +import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} +import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort, PortIdentity} +import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} +import edu.uci.ics.texera.workflow.common.operators.LogicalOp +import edu.uci.ics.texera.workflow.common.tuple.schema.Schema + +class DataToStateOpDesc extends LogicalOp { + override def getPhysicalOp( + workflowId: WorkflowIdentity, + executionId: ExecutionIdentity + ): PhysicalOp = { + PhysicalOp + .oneToOnePhysicalOp( + workflowId, + executionId, + operatorIdentifier, + OpExecInitInfo((_, _) => { + new DataToStateOpExec() + }) + ) + .withInputPorts(operatorInfo.inputPorts) + .withOutputPorts(operatorInfo.outputPorts) + .withPropagateSchema( + SchemaPropagationFunc(inputSchemas => Map(PortIdentity() -> inputSchemas(PortIdentity(1)))) + ) + .withSuggestedWorkerNum(1) + } + + override def operatorInfo: OperatorInfo = + OperatorInfo( + "Data To State", + "Convert Data to State", + OperatorGroupConstants.UTILITY_GROUP, + inputPorts = List( + InputPort(PortIdentity(), "State"), + InputPort(PortIdentity(1), "Data", dependencies = List(PortIdentity())) + ), + outputPorts = List(OutputPort()) + ) + + override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(1) +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala new file mode 100644 index 00000000000..a4423ec99ae --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala @@ -0,0 +1,28 @@ +package edu.uci.ics.texera.workflow.operators.state + +import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike +import edu.uci.ics.texera.workflow.common.State +import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor +import edu.uci.ics.texera.workflow.common.tuple.Tuple + +import scala.collection.mutable + +class DataToStateOpExec extends OperatorExecutor { + private val buffer = new mutable.ArrayBuffer[Tuple]() + private var stateTuple: Tuple = _ + + override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { + port match { + case 0 => + if (stateTuple == null) + stateTuple = tuple + case 1 => + buffer += tuple + } + Iterator() + } + + override def onFinishProduceState(port: Int): Option[State] = Some(State().fromTuple(stateTuple)) + + override def onFinish(port: Int): Iterator[TupleLike] = buffer.iterator +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpDesc.scala new file mode 100644 index 00000000000..dfa5bc434a8 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpDesc.scala @@ -0,0 +1,42 @@ +package edu.uci.ics.texera.workflow.operators.state + +import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp +import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo +import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} +import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort, PortIdentity} +import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} +import edu.uci.ics.texera.workflow.common.operators.LogicalOp +import edu.uci.ics.texera.workflow.common.tuple.schema.Schema + +class DualInputStateReceiverOpDesc extends LogicalOp { + + override def getPhysicalOp( + workflowId: WorkflowIdentity, + executionId: ExecutionIdentity + ): PhysicalOp = { + PhysicalOp + .oneToOnePhysicalOp( + workflowId, + executionId, + operatorIdentifier, + OpExecInitInfo((_, _) => { + new DualInputStateReceiverOpExec() + }) + ) + .withInputPorts(operatorInfo.inputPorts) + .withOutputPorts(operatorInfo.outputPorts) + .withParallelizable(false) + } + + override def operatorInfo: OperatorInfo = + OperatorInfo( + "2 in Testing State Receiver", + "", + OperatorGroupConstants.UTILITY_GROUP, + inputPorts = List(InputPort(PortIdentity(0)), InputPort(PortIdentity(1))), + outputPorts = List(OutputPort()), + supportReconfiguration = true + ) + + override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(0) +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpExec.scala new file mode 100644 index 00000000000..ea6ad205f4b --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpExec.scala @@ -0,0 +1,26 @@ +package edu.uci.ics.texera.workflow.operators.state + +import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike +import edu.uci.ics.amber.engine.common.workflow.PortIdentity +import edu.uci.ics.texera.workflow.common.State +import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor +import edu.uci.ics.texera.workflow.common.tuple.Tuple + +class DualInputStateReceiverOpExec extends OperatorExecutor { + + override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { + Iterator(tuple) + } + + override def processTupleMultiPort( + tuple: Tuple, + port: Int + ): Iterator[(TupleLike, Option[PortIdentity])] = { + processTuple(tuple, port).map(t => (t, None)) + } + + override def processState(state: State, port: Int): State = { + println(port, state) + state + } +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpDesc.scala new file mode 100644 index 00000000000..346084072fc --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpDesc.scala @@ -0,0 +1,42 @@ +package edu.uci.ics.texera.workflow.operators.state + +import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp +import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo +import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} +import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort} +import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} +import edu.uci.ics.texera.workflow.common.operators.LogicalOp +import edu.uci.ics.texera.workflow.common.tuple.schema.Schema + +class StateReceiverOpDesc extends LogicalOp { + + override def getPhysicalOp( + workflowId: WorkflowIdentity, + executionId: ExecutionIdentity + ): PhysicalOp = { + PhysicalOp + .oneToOnePhysicalOp( + workflowId, + executionId, + operatorIdentifier, + OpExecInitInfo((_, _) => { + new StateReceiverOpExec() + }) + ) + .withInputPorts(operatorInfo.inputPorts) + .withOutputPorts(operatorInfo.outputPorts) + .withParallelizable(false) + } + + override def operatorInfo: OperatorInfo = + OperatorInfo( + "Testing State Receiver", + "", + OperatorGroupConstants.UTILITY_GROUP, + inputPorts = List(InputPort()), + outputPorts = List(OutputPort()), + supportReconfiguration = true + ) + + override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(0) +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpExec.scala new file mode 100644 index 00000000000..41af2291e6a --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpExec.scala @@ -0,0 +1,18 @@ +package edu.uci.ics.texera.workflow.operators.state + +import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike +import edu.uci.ics.texera.workflow.common.State +import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor +import edu.uci.ics.texera.workflow.common.tuple.Tuple + +class StateReceiverOpExec extends OperatorExecutor { + + override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { + Iterator(tuple) + } + + override def processState(state: State, port: Int): State = { + println(state) + state + } +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala new file mode 100644 index 00000000000..053dafe92c1 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala @@ -0,0 +1,62 @@ +package edu.uci.ics.texera.workflow.operators.state + +import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription} +import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle +import edu.uci.ics.amber.engine.architecture.deploysemantics.{PhysicalOp, SchemaPropagationFunc} +import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo +import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} +import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort, PortIdentity} +import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} +import edu.uci.ics.texera.workflow.common.operators.LogicalOp +import edu.uci.ics.texera.workflow.common.tuple.schema.{Attribute, Schema} + +class StateToDataOpDesc extends LogicalOp { + @JsonProperty + @JsonSchemaTitle("State output column(s)") + @JsonPropertyDescription( + "Name of the newly added output columns that the UDF will produce, if any" + ) + var outputColumns: List[Attribute] = List() + + override def getPhysicalOp( + workflowId: WorkflowIdentity, + executionId: ExecutionIdentity + ): PhysicalOp = { + PhysicalOp + .oneToOnePhysicalOp( + workflowId, + executionId, + operatorIdentifier, + OpExecInitInfo((_, _) => { + new StateToDataOpExec() + }) + ) + .withInputPorts(operatorInfo.inputPorts) + .withOutputPorts(operatorInfo.outputPorts) + .withPropagateSchema( + SchemaPropagationFunc(inputSchemas => + getOutputSchemas( + operatorInfo.inputPorts.map(port => inputSchemas(port.id)).toArray + ).zipWithIndex.map { + case (schema, index) => PortIdentity(index) -> schema + }.toMap + ) + ) + .withSuggestedWorkerNum(1) + } + + override def operatorInfo: OperatorInfo = + OperatorInfo( + "State To Data", + "Convert State to Data", + OperatorGroupConstants.UTILITY_GROUP, + inputPorts = List(InputPort()), + outputPorts = List(OutputPort(PortIdentity(), "State"), OutputPort(PortIdentity(1), "Data")) + ) + + override def getOutputSchema(schemas: Array[Schema]): Schema = throw new NotImplementedError() + + override def getOutputSchemas(schemas: Array[Schema]): Array[Schema] = + Array(Schema.builder().add(outputColumns).build(), schemas(0)) + +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala new file mode 100644 index 00000000000..692309a9468 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala @@ -0,0 +1,32 @@ +package edu.uci.ics.texera.workflow.operators.state + +import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike +import edu.uci.ics.amber.engine.common.workflow.PortIdentity +import edu.uci.ics.texera.workflow.common.State +import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor +import edu.uci.ics.texera.workflow.common.tuple.Tuple + +class StateToDataOpExec extends OperatorExecutor { + private var stateTuple: Tuple = _ + + override def processState(state: State, port: Int): State = { + if (state.size > 0) + stateTuple = state.toTuple + State() + } + + override def processTupleMultiPort( + tuple: Tuple, + port: Int + ): Iterator[(TupleLike, Option[PortIdentity])] = { + if (stateTuple != null) { + val outputTuple = stateTuple + stateTuple = null + Array((outputTuple, Some(PortIdentity())), (tuple, Some(PortIdentity(1)))).iterator + } else { + Iterator((tuple, Some(PortIdentity(1)))) + } + } + + override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = throw new NotImplementedError() +} From 8ddea2c36b0bfca5d6fd509775ee83612bda16bb Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sun, 8 Sep 2024 22:37:24 -0700 Subject: [PATCH 085/163] update --- .../architecture/packaging/output_manager.py | 8 +++---- .../sendsemantics/broad_cast_partitioner.py | 14 +++++------- .../hash_based_shuffle_partitioner.py | 19 +++++++--------- .../sendsemantics/one_to_one_partitioner.py | 22 +++++-------------- .../architecture/sendsemantics/partitioner.py | 10 +++++---- .../range_based_shuffle_partitioner.py | 14 +++++------- .../sendsemantics/round_robin_partitioner.py | 22 +++++-------------- 7 files changed, 38 insertions(+), 71 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/packaging/output_manager.py b/core/amber/src/main/python/core/architecture/packaging/output_manager.py index cc907b17f03..5f77d3da3e3 100644 --- a/core/amber/src/main/python/core/architecture/packaging/output_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/output_manager.py @@ -108,12 +108,12 @@ def state_to_batch( ( receiver, ( - MarkerFrame(tuples) - if isinstance(tuples, State) - else self.tuple_to_frame(tuples) + MarkerFrame(payload) + if isinstance(payload, State) + else self.tuple_to_frame(payload) ), ) - for receiver, tuples in partitioner.add_state_to_batch(state) + for receiver, payload in partitioner.flush(state) ) for partitioner in self._partitioners.values() ) diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/broad_cast_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/broad_cast_partitioner.py index 03afbdc1e99..9a4939435ad 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/broad_cast_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/broad_cast_partitioner.py @@ -5,7 +5,7 @@ from core.architecture.sendsemantics.partitioner import Partitioner from core.models import Tuple -from core.models.marker import EndOfUpstream +from core.models.marker import EndOfUpstream, Marker from core.util import set_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import ( Partitioning, @@ -34,20 +34,16 @@ def add_tuple_to_batch( self.reset() @overrides - def no_more( - self, - ) -> Iterator[ - typing.Tuple[ - ActorVirtualIdentity, typing.Union[EndOfUpstream, typing.List[Tuple]] - ] - ]: + def flush( + self, marker: Marker + ) -> Iterator[typing.Tuple[ActorVirtualIdentity, typing.Union[Marker, typing.List[Tuple]]]]: if len(self.batch) > 0: for receiver in self.receivers: yield receiver, self.batch self.reset() for receiver in self.receivers: - yield receiver, EndOfUpstream() + yield receiver, marker @overrides def reset(self) -> None: diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py index ffb21bf8e9a..5b6b7777c66 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py @@ -6,7 +6,7 @@ from copy import deepcopy from core.architecture.sendsemantics.partitioner import Partitioner from core.models import Tuple, State -from core.models.marker import EndOfUpstream +from core.models.marker import EndOfUpstream, Marker from core.util import set_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import ( HashBasedShufflePartitioning, @@ -42,15 +42,6 @@ def add_tuple_to_batch( yield receiver, batch self.receivers[hash_code] = (receiver, list()) - @overrides - def add_state_to_batch(self, state: State): - for receiver, batch in self.receivers: - if len(batch) > 0: - yield receiver, deepcopy(batch) - yield receiver, deepcopy(batch) - batch.clear() - yield receiver, state - @overrides def no_more( self, @@ -59,7 +50,13 @@ def no_more( ActorVirtualIdentity, typing.Union[EndOfUpstream, typing.List[Tuple]] ] ]: + return self.flush(EndOfUpstream()) + + @overrides + def flush( + self, marker: Marker + ) -> Iterator[typing.Tuple[ActorVirtualIdentity, typing.Union[Marker, typing.List[Tuple]]]]: for receiver, batch in self.receivers: if len(batch) > 0: yield receiver, batch - yield receiver, EndOfUpstream() + yield receiver, marker \ No newline at end of file diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py index c3e405e6d88..cd7e452609e 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py @@ -6,7 +6,7 @@ from copy import deepcopy from core.architecture.sendsemantics.partitioner import Partitioner from core.models import Tuple, State -from core.models.marker import EndOfUpstream +from core.models.marker import EndOfUpstream, Marker from core.util import set_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import ( OneToOnePartitioning, @@ -35,25 +35,13 @@ def add_tuple_to_batch( self.reset() @overrides - def add_state_to_batch(self, state: State): - if len(self.batch) > 0: - yield self.receiver, deepcopy(self.batch) - self.reset() - - yield self.receiver, state - - @overrides - def no_more( - self, - ) -> Iterator[ - typing.Tuple[ - ActorVirtualIdentity, typing.Union[EndOfUpstream, typing.List[Tuple]] - ] - ]: + def flush( + self, marker: Marker + ) -> Iterator[typing.Tuple[ActorVirtualIdentity, typing.Union[Marker, typing.List[Tuple]]]]: if len(self.batch) > 0: yield self.receiver, self.batch self.reset() - yield self.receiver, EndOfUpstream() + yield self.receiver, marker @overrides def reset(self) -> None: diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py index e3918c58b98..2c8925c91b1 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py @@ -5,7 +5,7 @@ from betterproto import Message from core.models import Tuple, State -from core.models.marker import EndOfUpstream +from core.models.marker import EndOfUpstream, Marker from core.util import get_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import Partitioning from proto.edu.uci.ics.amber.engine.common import ActorVirtualIdentity @@ -19,9 +19,6 @@ def add_tuple_to_batch( self, tuple_: Tuple ) -> Iterator[typing.Tuple[ActorVirtualIdentity, typing.List[Tuple]]]: pass - - def add_state_to_batch(self, state: State): - pass def no_more( self, @@ -30,6 +27,11 @@ def no_more( ActorVirtualIdentity, typing.Union[EndOfUpstream, typing.List[Tuple]] ] ]: + return self.flush(EndOfUpstream()) + + def flush( + self, marker: Marker + ) -> Iterator[typing.Tuple[ActorVirtualIdentity, typing.Union[Marker, typing.List[Tuple]]]]: pass def reset(self) -> None: diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/range_based_shuffle_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/range_based_shuffle_partitioner.py index 66879868b12..8f6b882a3a1 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/range_based_shuffle_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/range_based_shuffle_partitioner.py @@ -6,7 +6,7 @@ from core.architecture.sendsemantics.partitioner import Partitioner from core.models import Tuple -from core.models.marker import EndOfUpstream +from core.models.marker import EndOfUpstream, Marker from core.util import set_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import ( RangeBasedShufflePartitioning, @@ -56,14 +56,10 @@ def add_tuple_to_batch( self.receivers[receiver_index] = (receiver, list()) @overrides - def no_more( - self, - ) -> Iterator[ - typing.Tuple[ - ActorVirtualIdentity, typing.Union[EndOfUpstream, typing.List[Tuple]] - ] - ]: + def flush( + self, marker: Marker + ) -> Iterator[typing.Tuple[ActorVirtualIdentity, typing.Union[Marker, typing.List[Tuple]]]]: for receiver, batch in self.receivers: if len(batch) > 0: yield receiver, batch - yield receiver, EndOfUpstream() + yield receiver, marker \ No newline at end of file diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py index 070f67e6fb1..01cbbb9a1da 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py @@ -6,7 +6,7 @@ from copy import deepcopy from core.architecture.sendsemantics.partitioner import Partitioner from core.models import Tuple, State -from core.models.marker import EndOfUpstream +from core.models.marker import EndOfUpstream, Marker from core.util import set_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import ( Partitioning, @@ -37,22 +37,10 @@ def add_tuple_to_batch( self.round_robin_index = (self.round_robin_index + 1) % len(self.receivers) @overrides - def add_state_to_batch(self, state: State): - for receiver, batch in self.receivers: - if len(batch) > 0: - yield receiver, deepcopy(batch) - batch.clear() - yield receiver, state - - @overrides - def no_more( - self, - ) -> Iterator[ - typing.Tuple[ - ActorVirtualIdentity, typing.Union[EndOfUpstream, typing.List[Tuple]] - ] - ]: + def flush( + self, marker: Marker + ) -> Iterator[typing.Tuple[ActorVirtualIdentity, typing.Union[Marker, typing.List[Tuple]]]]: for receiver, batch in self.receivers: if len(batch) > 0: yield receiver, batch - yield receiver, EndOfUpstream() + yield receiver, marker From ce832ff0f7c584238d906c8a371825c09c4b9984 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 9 Sep 2024 02:54:20 -0700 Subject: [PATCH 086/163] update --- .../src/main/python/core/models/marker.py | 4 + .../python/core/runnables/network_receiver.py | 4 +- .../architecture/worker/DataProcessor.scala | 41 +++++++++- .../worker/promisehandlers/StartHandler.scala | 6 +- .../ics/texera/workflow/common/Marker.scala | 1 + .../workflow/common/operators/LogicalOp.scala | 77 +++++++++++++++++-- .../common/operators/OperatorExecutor.scala | 2 + .../operators/state/ProduceStateOpDesc.scala | 42 ++++++++++ .../operators/state/ProduceStateOpExec.scala | 21 +++++ 9 files changed, 184 insertions(+), 14 deletions(-) create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpDesc.scala create mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpExec.scala diff --git a/core/amber/src/main/python/core/models/marker.py b/core/amber/src/main/python/core/models/marker.py index b241e5c31e7..7f2a5c93a01 100644 --- a/core/amber/src/main/python/core/models/marker.py +++ b/core/amber/src/main/python/core/models/marker.py @@ -6,6 +6,10 @@ class Marker: pass +@dataclass +class StartOfUpstream(Marker): + pass + @dataclass class EndOfUpstream(Marker): pass diff --git a/core/amber/src/main/python/core/runnables/network_receiver.py b/core/amber/src/main/python/core/runnables/network_receiver.py index 43a16583e3b..9efe8cb53aa 100644 --- a/core/amber/src/main/python/core/runnables/network_receiver.py +++ b/core/amber/src/main/python/core/runnables/network_receiver.py @@ -17,7 +17,7 @@ MarkerFrame, ) from core.models.internal_queue import DataElement, ControlElement, InternalQueue -from core.models.marker import EndOfUpstream, State +from core.models.marker import EndOfUpstream, State, StartOfUpstream from core.proxy import ProxyServer from core.util import Stoppable, get_one_of from core.util.runnable.runnable import Runnable @@ -68,6 +68,8 @@ def data_handler(command: bytes, table: Table) -> int: payload = DataFrame(table) elif payload_type == "State": payload = MarkerFrame(State().from_table(table)) + elif payload_type == "StartOfUpstream": + payload = MarkerFrame(StartOfUpstream()) elif payload_type == "EndOfUpstream": payload = MarkerFrame(EndOfUpstream()) shared_queue.put( diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index a2fef62d36f..d7885da7d6d 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -7,20 +7,33 @@ import edu.uci.ics.amber.engine.architecture.controller.promisehandlers.PortComp import edu.uci.ics.amber.engine.architecture.controller.promisehandlers.WorkerExecutionCompletedHandler.WorkerExecutionCompleted import edu.uci.ics.amber.engine.architecture.controller.promisehandlers.WorkerStateUpdatedHandler.WorkerStateUpdated import edu.uci.ics.amber.engine.architecture.logreplay.ReplayLogManager -import edu.uci.ics.amber.engine.architecture.messaginglayer.{InputManager, OutputManager, WorkerTimerService} +import edu.uci.ics.amber.engine.architecture.messaginglayer.{ + InputManager, + OutputManager, + WorkerTimerService +} import edu.uci.ics.amber.engine.architecture.worker.WorkflowWorker.MainThreadDelegateMessage import edu.uci.ics.amber.engine.architecture.worker.managers.SerializationManager import edu.uci.ics.amber.engine.architecture.worker.promisehandlers.PauseHandler.PauseWorker -import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerState.{COMPLETED, READY, RUNNING} +import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerState.{ + COMPLETED, + READY, + RUNNING +} import edu.uci.ics.amber.engine.architecture.worker.statistics.WorkerStatistics import edu.uci.ics.amber.engine.common.ambermessage._ import edu.uci.ics.amber.engine.common.statetransition.WorkerStateManager -import edu.uci.ics.amber.engine.common.tuple.amber.{FinalizeExecutor, FinalizePort, SchemaEnforceable, TupleLike} +import edu.uci.ics.amber.engine.common.tuple.amber.{ + FinalizeExecutor, + FinalizePort, + SchemaEnforceable, + TupleLike +} import edu.uci.ics.amber.engine.common.virtualidentity.util.{CONTROLLER, SELF} import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, ChannelIdentity} import edu.uci.ics.amber.engine.common.workflow.PortIdentity import edu.uci.ics.amber.error.ErrorUtils.{mkConsoleMessage, safely} -import edu.uci.ics.texera.workflow.common.{EndOfUpstream, State} +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfUpstream, State} import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple @@ -84,6 +97,24 @@ class DataProcessor( outputManager.emitMarker(executor.processState(state, port)) } + /** + * process start of an input port with Executor.onStart(). + * this function is only called by the DP thread. + */ + private[this] def processStartOfUpstream(portId: Int): Unit = { + try { + outputManager.emitMarker(StartOfUpstream()) + val outputState = executor.onStartProduceState(portId) + if (outputState.isDefined) { + outputManager.emitMarker(outputState.get) + } + } catch safely { + case e => + // forward input tuple to the user and pause DP thread + handleExecutorException(e) + } + } + /** * process end of an input port with Executor.onFinish(). * this function is only called by the DP thread. @@ -189,6 +220,8 @@ class DataProcessor( marker match { case state: State => processInputState(state, portId.id) + case StartOfUpstream() => + processStartOfUpstream(portId.id) case EndOfUpstream() => this.inputManager.getPort(portId).channels(channelId) = true if (inputManager.isPortCompleted(portId)) { diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala index b4cd89ec2b0..fe085b065be 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala @@ -11,7 +11,7 @@ import edu.uci.ics.amber.engine.common.rpc.AsyncRPCServer.ControlCommand import edu.uci.ics.amber.engine.common.virtualidentity.ChannelIdentity import edu.uci.ics.amber.engine.common.virtualidentity.util.SOURCE_STARTER_ACTOR import edu.uci.ics.amber.engine.common.workflow.PortIdentity -import edu.uci.ics.texera.workflow.common.{EndOfUpstream, State} +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfUpstream, State} object StartHandler { final case class StartWorker() extends ControlCommand[WorkerState] @@ -31,6 +31,10 @@ trait StartHandler { dp.inputGateway .getChannel(ChannelIdentity(SOURCE_STARTER_ACTOR, actorId, isControl = false)) .setPortId(dummyInputPortId) + dp.processDataPayload( + ChannelIdentity(SOURCE_STARTER_ACTOR, dp.actorId, isControl = false), + MarkerFrame(StartOfUpstream()) + ) dp.processDataPayload( ChannelIdentity(SOURCE_STARTER_ACTOR, dp.actorId, isControl = false), MarkerFrame(EndOfUpstream()) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index dd26ae69a32..e2d99e27cb0 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -6,6 +6,7 @@ import scala.collection.mutable sealed trait Marker +final case class StartOfUpstream() extends Marker final case class EndOfUpstream() extends Marker final case class State() extends Marker { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala index 60f5210feb5..e428d287783 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala @@ -1,10 +1,20 @@ package edu.uci.ics.texera.workflow.common.operators import com.fasterxml.jackson.annotation.JsonSubTypes.Type -import com.fasterxml.jackson.annotation.{JsonIgnore, JsonProperty, JsonPropertyDescription, JsonSubTypes, JsonTypeInfo} +import com.fasterxml.jackson.annotation.{ + JsonIgnore, + JsonProperty, + JsonPropertyDescription, + JsonSubTypes, + JsonTypeInfo +} import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp -import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, OperatorIdentity, WorkflowIdentity} +import edu.uci.ics.amber.engine.common.virtualidentity.{ + ExecutionIdentity, + OperatorIdentity, + WorkflowIdentity +} import edu.uci.ics.amber.engine.common.workflow.PortIdentity import edu.uci.ics.texera.web.OPversion import edu.uci.ics.texera.workflow.common.metadata.{OperatorInfo, PropertyNameConstants} @@ -24,8 +34,16 @@ import edu.uci.ics.texera.workflow.operators.intersect.IntersectOpDesc import edu.uci.ics.texera.workflow.operators.intervalJoin.IntervalJoinOpDesc import edu.uci.ics.texera.workflow.operators.keywordSearch.KeywordSearchOpDesc import edu.uci.ics.texera.workflow.operators.limit.LimitOpDesc -import edu.uci.ics.texera.workflow.operators.huggingFace.{HuggingFaceIrisLogisticRegressionOpDesc, HuggingFaceSentimentAnalysisOpDesc, HuggingFaceSpamSMSDetectionOpDesc, HuggingFaceTextSummarizationOpDesc} -import edu.uci.ics.texera.workflow.operators.machineLearning.sklearnAdvanced.KNNTrainer.{SklearnAdvancedKNNClassifierTrainerOpDesc, SklearnAdvancedKNNRegressorTrainerOpDesc} +import edu.uci.ics.texera.workflow.operators.huggingFace.{ + HuggingFaceIrisLogisticRegressionOpDesc, + HuggingFaceSentimentAnalysisOpDesc, + HuggingFaceSpamSMSDetectionOpDesc, + HuggingFaceTextSummarizationOpDesc +} +import edu.uci.ics.texera.workflow.operators.machineLearning.sklearnAdvanced.KNNTrainer.{ + SklearnAdvancedKNNClassifierTrainerOpDesc, + SklearnAdvancedKNNRegressorTrainerOpDesc +} import edu.uci.ics.texera.workflow.operators.machineLearning.sklearnAdvanced.SVCTrainer.SklearnAdvancedSVCTrainerOpDesc import edu.uci.ics.texera.workflow.operators.machineLearning.sklearnAdvanced.SVRTrainer.SVCTrainer.SklearnAdvancedSVRTrainerOpDesc import edu.uci.ics.texera.workflow.operators.projection.ProjectionOpDesc @@ -34,11 +52,42 @@ import edu.uci.ics.texera.workflow.operators.regex.RegexOpDesc import edu.uci.ics.texera.workflow.operators.reservoirsampling.ReservoirSamplingOpDesc import edu.uci.ics.texera.workflow.operators.sentiment.SentimentAnalysisOpDesc import edu.uci.ics.texera.workflow.operators.sink.managed.ProgressiveSinkOpDesc -import edu.uci.ics.texera.workflow.operators.sklearn.{SklearnAdaptiveBoostingOpDesc, SklearnBaggingOpDesc, SklearnBernoulliNaiveBayesOpDesc, SklearnComplementNaiveBayesOpDesc, SklearnDecisionTreeOpDesc, SklearnDummyClassifierOpDesc, SklearnExtraTreeOpDesc, SklearnExtraTreesOpDesc, SklearnGaussianNaiveBayesOpDesc, SklearnGradientBoostingOpDesc, SklearnKNNOpDesc, SklearnLinearRegressionOpDesc, SklearnLinearSVMOpDesc, SklearnLogisticRegressionCVOpDesc, SklearnLogisticRegressionOpDesc, SklearnMultiLayerPerceptronOpDesc, SklearnMultinomialNaiveBayesOpDesc, SklearnNearestCentroidOpDesc, SklearnPassiveAggressiveOpDesc, SklearnPerceptronOpDesc, SklearnPredictionOpDesc, SklearnProbabilityCalibrationOpDesc, SklearnRandomForestOpDesc, SklearnRidgeCVOpDesc, SklearnRidgeOpDesc, SklearnSDGOpDesc, SklearnSVMOpDesc} +import edu.uci.ics.texera.workflow.operators.sklearn.{ + SklearnAdaptiveBoostingOpDesc, + SklearnBaggingOpDesc, + SklearnBernoulliNaiveBayesOpDesc, + SklearnComplementNaiveBayesOpDesc, + SklearnDecisionTreeOpDesc, + SklearnDummyClassifierOpDesc, + SklearnExtraTreeOpDesc, + SklearnExtraTreesOpDesc, + SklearnGaussianNaiveBayesOpDesc, + SklearnGradientBoostingOpDesc, + SklearnKNNOpDesc, + SklearnLinearRegressionOpDesc, + SklearnLinearSVMOpDesc, + SklearnLogisticRegressionCVOpDesc, + SklearnLogisticRegressionOpDesc, + SklearnMultiLayerPerceptronOpDesc, + SklearnMultinomialNaiveBayesOpDesc, + SklearnNearestCentroidOpDesc, + SklearnPassiveAggressiveOpDesc, + SklearnPerceptronOpDesc, + SklearnPredictionOpDesc, + SklearnProbabilityCalibrationOpDesc, + SklearnRandomForestOpDesc, + SklearnRidgeCVOpDesc, + SklearnRidgeOpDesc, + SklearnSDGOpDesc, + SklearnSVMOpDesc +} import edu.uci.ics.texera.workflow.operators.sort.SortOpDesc import edu.uci.ics.texera.workflow.operators.sortPartitions.SortPartitionsOpDesc import edu.uci.ics.texera.workflow.operators.source.apis.reddit.RedditSearchSourceOpDesc -import edu.uci.ics.texera.workflow.operators.source.apis.twitter.v2.{TwitterFullArchiveSearchSourceOpDesc, TwitterSearchSourceOpDesc} +import edu.uci.ics.texera.workflow.operators.source.apis.twitter.v2.{ + TwitterFullArchiveSearchSourceOpDesc, + TwitterSearchSourceOpDesc +} import edu.uci.ics.texera.workflow.operators.source.fetcher.URLFetcherOpDesc import edu.uci.ics.texera.workflow.operators.source.scan.FileScanSourceOpDesc import edu.uci.ics.texera.workflow.operators.source.scan.csv.CSVScanSourceOpDesc @@ -53,7 +102,12 @@ import edu.uci.ics.texera.workflow.operators.symmetricDifference.SymmetricDiffer import edu.uci.ics.texera.workflow.operators.typecasting.TypeCastingOpDesc import edu.uci.ics.texera.workflow.operators.udf.java.JavaUDFOpDesc import edu.uci.ics.texera.workflow.operators.udf.python.source.PythonUDFSourceOpDescV2 -import edu.uci.ics.texera.workflow.operators.udf.python.{DualInputPortsPythonUDFOpDescV2, PythonLambdaFunctionOpDesc, PythonTableReducerOpDesc, PythonUDFOpDescV2} +import edu.uci.ics.texera.workflow.operators.udf.python.{ + DualInputPortsPythonUDFOpDescV2, + PythonLambdaFunctionOpDesc, + PythonTableReducerOpDesc, + PythonUDFOpDescV2 +} import edu.uci.ics.texera.workflow.operators.udf.r.{RUDFOpDesc, RUDFSourceOpDesc} import edu.uci.ics.texera.workflow.operators.union.UnionOpDesc import edu.uci.ics.texera.workflow.operators.unneststring.UnnestStringOpDesc @@ -85,7 +139,13 @@ import edu.uci.ics.texera.workflow.operators.visualization.ternaryPlot.TernaryPl import org.apache.commons.lang3.builder.{EqualsBuilder, HashCodeBuilder, ToStringBuilder} import org.apache.zookeeper.KeeperException.UnimplementedException import edu.uci.ics.texera.workflow.operators.machineLearning.Scorer.MachineLearningScorerOpDesc -import edu.uci.ics.texera.workflow.operators.state.{DataToStateOpDesc, DualInputStateReceiverOpDesc, StateReceiverOpDesc, StateToDataOpDesc} +import edu.uci.ics.texera.workflow.operators.state.{ + DataToStateOpDesc, + DualInputStateReceiverOpDesc, + ProduceStateOpDesc, + StateReceiverOpDesc, + StateToDataOpDesc +} import edu.uci.ics.texera.workflow.operators.visualization.quiverPlot.QuiverPlotOpDesc import edu.uci.ics.texera.workflow.operators.visualization.contourPlot.ContourPlotOpDesc import edu.uci.ics.texera.workflow.operators.visualization.figureFactoryTable.FigureFactoryTableOpDesc @@ -151,6 +211,7 @@ trait StateTransferFunc new Type(value = classOf[StateToDataOpDesc], name = "StateToData"), new Type(value = classOf[StateReceiverOpDesc], name = "TestingStateReceiver"), new Type(value = classOf[DualInputStateReceiverOpDesc], name = "DualInputTestingStateReceiver"), + new Type(value = classOf[ProduceStateOpDesc], name = "ProduceStateOpDesc"), new Type(value = classOf[RandomKSamplingOpDesc], name = "RandomKSampling"), new Type(value = classOf[ReservoirSamplingOpDesc], name = "ReservoirSampling"), new Type(value = classOf[HashJoinOpDesc[String]], name = "HashJoin"), diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala index 2c79feb9ecf..9c80fac4e17 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala @@ -9,6 +9,8 @@ trait OperatorExecutor { def open(): Unit = {} + def onStartProduceState(port: Int): Option[State] = None + def processState(state: State, port: Int): State = state def processTupleMultiPort( diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpDesc.scala new file mode 100644 index 00000000000..d282497524b --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpDesc.scala @@ -0,0 +1,42 @@ +package edu.uci.ics.texera.workflow.operators.state + +import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp +import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo +import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} +import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort} +import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} +import edu.uci.ics.texera.workflow.common.operators.LogicalOp +import edu.uci.ics.texera.workflow.common.tuple.schema.Schema + +class ProduceStateOpDesc extends LogicalOp { + + override def getPhysicalOp( + workflowId: WorkflowIdentity, + executionId: ExecutionIdentity + ): PhysicalOp = { + PhysicalOp + .oneToOnePhysicalOp( + workflowId, + executionId, + operatorIdentifier, + OpExecInitInfo((_, _) => { + new ProduceStateOpExec() + }) + ) + .withInputPorts(operatorInfo.inputPorts) + .withOutputPorts(operatorInfo.outputPorts) + .withParallelizable(false) + } + + override def operatorInfo: OperatorInfo = + OperatorInfo( + "Produce State", + "", + OperatorGroupConstants.UTILITY_GROUP, + inputPorts = List(InputPort()), + outputPorts = List(OutputPort()), + supportReconfiguration = true + ) + + override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(0) +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpExec.scala new file mode 100644 index 00000000000..1dbc0917b77 --- /dev/null +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpExec.scala @@ -0,0 +1,21 @@ +package edu.uci.ics.texera.workflow.operators.state + +import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike +import edu.uci.ics.texera.workflow.common.State +import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor +import edu.uci.ics.texera.workflow.common.tuple.Tuple +import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType + +class ProduceStateOpExec extends OperatorExecutor { + + override def onStartProduceState(port: Int): Option[State] = { + val state = State() + state.add("i", AttributeType.INTEGER, 1) + Some(state) + } + + override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { + Iterator(tuple) + } + +} From ffe9d2611523f0c902fe236b7aa7811ec2db2bba Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 9 Sep 2024 03:41:52 -0700 Subject: [PATCH 087/163] update --- .../architecture/packaging/input_manager.py | 6 ++-- .../architecture/packaging/output_manager.py | 35 ++++--------------- .../architecture/sendsemantics/partitioner.py | 9 ----- .../python/core/models/internal_marker.py | 3 ++ .../main/python/core/runnables/main_loop.py | 19 ++++++++-- 5 files changed, 30 insertions(+), 42 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index 1aced125f3c..0aa02be275d 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -1,8 +1,8 @@ from typing import Iterator, Optional, Union, Dict, List from core.models import Tuple, ArrowTableTupleProvider, Schema, InputExhausted -from core.models.internal_marker import EndOfAll, InternalMarker, SenderChange -from core.models.marker import EndOfUpstream, State +from core.models.internal_marker import EndOfAll, InternalMarker, SenderChange, StartOfAll +from core.models.marker import EndOfUpstream, State, StartOfUpstream from core.models.payload import DataFrame, DataPayload, MarkerFrame from proto.edu.uci.ics.amber.engine.common import ( ActorVirtualIdentity, @@ -110,6 +110,8 @@ def process_data_payload( elif isinstance(payload, MarkerFrame): if isinstance(payload.frame, State): yield payload.frame + if isinstance(payload.frame, StartOfUpstream): + yield StartOfAll() if isinstance(payload.frame, EndOfUpstream): channel = self._channels[self._current_channel_id] channel.complete() diff --git a/core/amber/src/main/python/core/architecture/packaging/output_manager.py b/core/amber/src/main/python/core/architecture/packaging/output_manager.py index 5f77d3da3e3..f60e44e75fe 100644 --- a/core/amber/src/main/python/core/architecture/packaging/output_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/output_manager.py @@ -22,7 +22,7 @@ BroadcastPartitioner, ) from core.models import Tuple, Schema, MarkerFrame, State -from core.models.marker import EndOfUpstream +from core.models.marker import EndOfUpstream, Marker from core.models.payload import DataPayload, DataFrame from core.util import get_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import ( @@ -99,9 +99,9 @@ def tuple_to_batch( ) ) - def state_to_batch( - self, state: State - ) -> Iterator[typing.Tuple[ActorVirtualIdentity, DataPayload]]: + def emit_marker( + self, marker: Marker + ) -> Iterable[typing.Tuple[ActorVirtualIdentity, DataPayload]]: return chain( *( ( @@ -109,17 +109,16 @@ def state_to_batch( receiver, ( MarkerFrame(payload) - if isinstance(payload, State) + if isinstance(payload, Marker) else self.tuple_to_frame(payload) ), ) - for receiver, payload in partitioner.flush(state) + for receiver, payload in partitioner.flush(marker) ) for partitioner in self._partitioners.values() ) ) - def tuple_to_frame(self, tuples: typing.List[Tuple]) -> DataFrame: return DataFrame( frame=Table.from_pydict( @@ -129,24 +128,4 @@ def tuple_to_frame(self, tuples: typing.List[Tuple]) -> DataFrame: }, schema=self.get_port().get_schema().as_arrow_schema(), ) - ) - - def emit_end_of_upstream( - self, - ) -> Iterable[typing.Tuple[ActorVirtualIdentity, DataPayload]]: - return chain( - *( - ( - ( - receiver, - ( - MarkerFrame(tuples) - if isinstance(tuples, EndOfUpstream) - else self.tuple_to_frame(tuples) - ), - ) - for receiver, tuples in partitioner.no_more() - ) - for partitioner in self._partitioners.values() - ) - ) + ) \ No newline at end of file diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py index 2c8925c91b1..d32f4f71d39 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py @@ -19,15 +19,6 @@ def add_tuple_to_batch( self, tuple_: Tuple ) -> Iterator[typing.Tuple[ActorVirtualIdentity, typing.List[Tuple]]]: pass - - def no_more( - self, - ) -> Iterator[ - typing.Tuple[ - ActorVirtualIdentity, typing.Union[EndOfUpstream, typing.List[Tuple]] - ] - ]: - return self.flush(EndOfUpstream()) def flush( self, marker: Marker diff --git a/core/amber/src/main/python/core/models/internal_marker.py b/core/amber/src/main/python/core/models/internal_marker.py index e10f7ce536e..ba37b6e212b 100644 --- a/core/amber/src/main/python/core/models/internal_marker.py +++ b/core/amber/src/main/python/core/models/internal_marker.py @@ -18,6 +18,9 @@ class InternalMarker: class SenderChange(InternalMarker): channel_id: ChannelIdentity +@dataclass +class StartOfAll(InternalMarker): + pass @dataclass class EndOfAll(InternalMarker): diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index 0985619594a..7764ad9ca04 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -18,8 +18,9 @@ SenderChange, Tuple, ) +from core.models.internal_marker import StartOfAll from core.models.internal_queue import DataElement, ControlElement -from core.models.marker import State +from core.models.marker import State, EndOfUpstream from core.runnables.data_processor import DataProcessor from core.util import StoppableQueueBlockingRunnable, get_one_of, set_one_of from core.util.customized_queue.queue_base import QueueElement @@ -171,7 +172,7 @@ def process_input_tuple(self) -> None: for (to, batch) in self.context.output_manager.tuple_to_batch(output_data): self._output_queue.put(DataElement(tag=to, payload=batch)) elif isinstance(output_data, State): - for (to, batch) in self.context.output_manager.state_to_batch(output_data): + for (to, batch) in self.context.output_manager.emit_marker(output_data): self._output_queue.put(DataElement(tag=to, payload=batch)) def process_tuple_with_udf(self) -> Iterator[Optional[Tuple]]: @@ -235,6 +236,16 @@ def _process_sender_change_marker(self, sender_change_marker: SenderChange) -> N self.context.input_manager.get_port_id(sender_change_marker.channel_id) ) + + def _process_start_of_all_marker(self, _: StartOfAll) -> None: + """ + Upon receipt of an StartOfAllMarker, which indicates the start of all input links, + send the StartOfUpstream to all downstream workers. + + :param _: StartOfAll Internal Marker + """ + + def _process_end_of_all_marker(self, _: EndOfAll) -> None: """ Upon receipt of an EndOfAllMarker, which indicates the end of all input links, @@ -244,7 +255,7 @@ def _process_end_of_all_marker(self, _: EndOfAll) -> None: :param _: EndOfAllMarker """ - for to, batch in self.context.output_manager.emit_end_of_upstream(): + for to, batch in self.context.output_manager.emit_marker(EndOfUpstream()): self._output_queue.put(DataElement(tag=to, payload=batch)) self._check_and_process_control() control_command = set_one_of( @@ -293,6 +304,8 @@ def _process_data_element(self, data_element: DataElement) -> None: self._process_input_exhausted, SenderChange, self._process_sender_change_marker, + StartOfAll, + self._process_start_of_all_marker, EndOfAll, self._process_end_of_all_marker, State, From a60f5622435017f73759f65dea0502bb541131f3 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 9 Sep 2024 03:42:47 -0700 Subject: [PATCH 088/163] update --- .../sendsemantics/hash_based_shuffle_partitioner.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py index 5b6b7777c66..5678e0cfe5a 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py @@ -42,16 +42,6 @@ def add_tuple_to_batch( yield receiver, batch self.receivers[hash_code] = (receiver, list()) - @overrides - def no_more( - self, - ) -> Iterator[ - typing.Tuple[ - ActorVirtualIdentity, typing.Union[EndOfUpstream, typing.List[Tuple]] - ] - ]: - return self.flush(EndOfUpstream()) - @overrides def flush( self, marker: Marker From 27a40fc6c8ff8685c22d4152bd73f4a3f6d81118 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 9 Sep 2024 03:51:23 -0700 Subject: [PATCH 089/163] update --- core/amber/src/main/python/core/runnables/main_loop.py | 5 ++++- .../architecture/pythonworker/PythonProxyServer.scala | 7 +++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index 7764ad9ca04..58d47662125 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -20,7 +20,7 @@ ) from core.models.internal_marker import StartOfAll from core.models.internal_queue import DataElement, ControlElement -from core.models.marker import State, EndOfUpstream +from core.models.marker import State, EndOfUpstream, StartOfUpstream from core.runnables.data_processor import DataProcessor from core.util import StoppableQueueBlockingRunnable, get_one_of, set_one_of from core.util.customized_queue.queue_base import QueueElement @@ -244,6 +244,9 @@ def _process_start_of_all_marker(self, _: StartOfAll) -> None: :param _: StartOfAll Internal Marker """ + for to, batch in self.context.output_manager.emit_marker(StartOfUpstream()): + self._output_queue.put(DataElement(tag=to, payload=batch)) + self._check_and_process_control() def _process_end_of_all_marker(self, _: EndOfAll) -> None: diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala index 2b5eb3863ab..ad7574518d0 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala @@ -20,7 +20,7 @@ import java.net.ServerSocket import java.util.concurrent.atomic.AtomicInteger import scala.collection.mutable import com.twitter.util.Promise -import edu.uci.ics.texera.workflow.common.{EndOfUpstream, State} +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfUpstream, State} import java.nio.charset.Charset @@ -103,7 +103,10 @@ private class AmberProducer( // closing the stream will release the dictionaries flightStream.takeDictionaryOwnership - if (dataHeader.payloadType == EndOfUpstream().getClass.getSimpleName) { + if (dataHeader.payloadType == StartOfUpstream().getClass.getSimpleName) { + assert(root.getRowCount == 0) + outputPort.sendTo(to, MarkerFrame(StartOfUpstream())) + } else if (dataHeader.payloadType == EndOfUpstream().getClass.getSimpleName) { assert(root.getRowCount == 0) outputPort.sendTo(to, MarkerFrame(EndOfUpstream())) } else if (dataHeader.payloadType == State().getClass.getSimpleName) { From a88edd1bb244de1826322b9bef8c1cf5ab35763e Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Tue, 10 Sep 2024 00:25:06 -0700 Subject: [PATCH 090/163] update --- .../architecture/worker/promisehandlers/StartHandler.scala | 4 ---- 1 file changed, 4 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala index fe085b065be..cf40d028a13 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala @@ -39,10 +39,6 @@ trait StartHandler { ChannelIdentity(SOURCE_STARTER_ACTOR, dp.actorId, isControl = false), MarkerFrame(EndOfUpstream()) ) - dp.processDataPayload( - ChannelIdentity(SOURCE_STARTER_ACTOR, dp.actorId, isControl = false), - MarkerFrame(State()) - ) dp.stateManager.getCurrentState } else { throw new WorkflowRuntimeException( From 79a4e8f7ff4009c496c343de30850cc8bc0d316e Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 11 Sep 2024 20:52:09 -0700 Subject: [PATCH 091/163] update --- .../main/python/core/architecture/packaging/input_manager.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index 0aa02be275d..8dc9442767a 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -105,8 +105,6 @@ def process_data_payload( ].get_schema(), ) - - elif isinstance(payload, MarkerFrame): if isinstance(payload.frame, State): yield payload.frame From 4c422f2c012c2447fec080881b859a528555e6dd Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 11 Sep 2024 20:52:21 -0700 Subject: [PATCH 092/163] update --- .../scala/edu/uci/ics/texera/workflow/common/Marker.scala | 6 +++--- .../workflow/operators/state/ProduceStateOpExec.scala | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index e2d99e27cb0..1889f2eec3c 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -12,8 +12,8 @@ final case class EndOfUpstream() extends Marker final case class State() extends Marker { val list: mutable.Map[String, (AttributeType, Any)] = mutable.HashMap() - def add(attributeName: String, attributeType: AttributeType, field: Any): Unit = { - list.put(attributeName, (attributeType, field)) + def add(key: String, value: Any, valueType: AttributeType): Unit = { + list.put(key, (valueType, value)) } def get(key: String): Any = list(key)._2 @@ -36,7 +36,7 @@ final case class State() extends Marker { def fromTuple(tuple: Tuple): State = { tuple.getSchema.getAttributes.foreach { attribute => - add(attribute.getName, attribute.getType, tuple.getField(attribute.getName)) + add(attribute.getName, tuple.getField(attribute.getName), attribute.getType) } this } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpExec.scala index 1dbc0917b77..d86c993619e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpExec.scala @@ -10,7 +10,7 @@ class ProduceStateOpExec extends OperatorExecutor { override def onStartProduceState(port: Int): Option[State] = { val state = State() - state.add("i", AttributeType.INTEGER, 1) + state.add("i", 1, AttributeType.INTEGER) Some(state) } From 00c1504e3240d6239c5fa8a19e42d60fbb0ed229 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 11 Sep 2024 21:20:28 -0700 Subject: [PATCH 093/163] update --- .../src/main/python/core/models/marker.py | 35 +++++++++++-------- .../core/models/schema/attribute_type.py | 10 ++++++ .../python/core/runnables/network_receiver.py | 2 +- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/core/amber/src/main/python/core/models/marker.py b/core/amber/src/main/python/core/models/marker.py index 7f2a5c93a01..0fd1130e4d1 100644 --- a/core/amber/src/main/python/core/models/marker.py +++ b/core/amber/src/main/python/core/models/marker.py @@ -1,6 +1,10 @@ from dataclasses import dataclass from pyarrow import Table from pandas import DataFrame +from typing import Optional +from .schema import Schema, AttributeType +from .schema.attribute_type import FROM_PYOBJECT_MAPPING + @dataclass class Marker: @@ -16,28 +20,29 @@ class EndOfUpstream(Marker): @dataclass class State(Marker): - def __init__(self): - self.data = {} - - def add(self, key: str, value: any) -> None: + def __init__(self, table: Optional[Table] = None): + if table is None: + self.data = {} + self.schema = Schema() + else: + self.data = table.to_pandas().iloc[0].to_dict() + self.schema = table.schema + + def add(self, key: str, value: any, value_type: Optional[AttributeType] = None) -> None: self.data[key] = value + if value_type is not None: + self.schema.add(key, value_type) + else: + self.schema.add(key, FROM_PYOBJECT_MAPPING[type(value)]) def get(self, key: str) -> any: return self.data[key] - def from_dict(self, dictionary: dict) -> "State": - for key, value in dictionary.items(): - self.add(key, value) - return self - def to_table(self) -> Table: - return Table.from_pandas(df=DataFrame([self.data])) - - def from_table(self, table: Table) -> "State": - return self.from_dict(table.to_pandas().iloc[0].to_dict()) + return Table.from_pandas(df=DataFrame([self.data]), schema=self.schema.as_arrow_schema(),) - def __setitem__(self, key: str, value: any): - self.add(key, value) + def __setitem__(self, key: str, value: any, value_type: AttributeType) -> None: + self.add(key, value, value_type) def __getitem__(self, key: str) -> any: return self.get(key) diff --git a/core/amber/src/main/python/core/models/schema/attribute_type.py b/core/amber/src/main/python/core/models/schema/attribute_type.py index ee7c8afebcc..09636cae205 100644 --- a/core/amber/src/main/python/core/models/schema/attribute_type.py +++ b/core/amber/src/main/python/core/models/schema/attribute_type.py @@ -66,3 +66,13 @@ class AttributeType(Enum): AttributeType.BINARY: bytes, AttributeType.TIMESTAMP: datetime.datetime, } + +FROM_PYOBJECT_MAPPING = { + str: AttributeType.STRING, + int: AttributeType.INT, + float: AttributeType.DOUBLE, + bool: AttributeType.BOOL, + bytes: AttributeType.BINARY, + datetime.datetime: AttributeType.TIMESTAMP, +} + diff --git a/core/amber/src/main/python/core/runnables/network_receiver.py b/core/amber/src/main/python/core/runnables/network_receiver.py index 9efe8cb53aa..0aec42718da 100644 --- a/core/amber/src/main/python/core/runnables/network_receiver.py +++ b/core/amber/src/main/python/core/runnables/network_receiver.py @@ -67,7 +67,7 @@ def data_handler(command: bytes, table: Table) -> int: if payload_type == "Data": payload = DataFrame(table) elif payload_type == "State": - payload = MarkerFrame(State().from_table(table)) + payload = MarkerFrame(State(table)) elif payload_type == "StartOfUpstream": payload = MarkerFrame(StartOfUpstream()) elif payload_type == "EndOfUpstream": From 51dff7db8bdf202b1662f6bbad970c09691a4680 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Fri, 13 Sep 2024 20:42:53 -0700 Subject: [PATCH 094/163] update --- .../managers/tuple_processing_manager.py | 21 +++++++++-- .../architecture/packaging/input_manager.py | 8 +++-- .../python/core/models/internal_marker.py | 7 +++- .../src/main/python/core/models/operator.py | 9 +++-- .../python/core/runnables/data_processor.py | 36 +++++++++++-------- .../main/python/core/runnables/main_loop.py | 19 ++++++---- 6 files changed, 69 insertions(+), 31 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py index 1febc5ee1ed..68d35e45fb1 100644 --- a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py +++ b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py @@ -2,12 +2,13 @@ from typing import Optional, Union, Tuple, Iterator from core.models import InputExhausted -from core.models.marker import State +from core.models.marker import State, Marker from proto.edu.uci.ics.amber.engine.common import PortIdentity class TupleProcessingManager: def __init__(self): + self.current_input_marker: Optional[Marker] = None self.current_input_tuple: Optional[Union[Tuple, InputExhausted]] = None self.current_input_port_id: Optional[PortIdentity] = None self.current_input_tuple_iter: Optional[ @@ -23,10 +24,24 @@ def get_output_tuple(self) -> Optional[Tuple]: ret, self.current_output_tuple = self.current_output_tuple, None return ret + def get_input_state(self) -> Optional[State]: + ret, self.current_input_state = self.current_input_state, None + return ret + def get_output_state(self) -> Optional[State]: ret, self.current_output_state = self.current_output_state, None return ret - def get_input_state(self) -> Optional[State]: - ret, self.current_input_state = self.current_input_state, None + def get_input_marker(self) -> Optional[State]: + ret, self.current_input_marker = self.current_input_marker, None return ret + + def get_input_port(self) -> int: + port_id = self.current_input_port_id + port: int + if port_id is None: + # no upstream, special case for source executor. + port = 0 + else: + port = port_id.id + return port diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index 8dc9442767a..5e840db5abc 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -1,7 +1,7 @@ from typing import Iterator, Optional, Union, Dict, List from core.models import Tuple, ArrowTableTupleProvider, Schema, InputExhausted -from core.models.internal_marker import EndOfAll, InternalMarker, SenderChange, StartOfAll +from core.models.internal_marker import EndOfAll, InternalMarker, SenderChange, StartOfAny, InputInitialized from core.models.marker import EndOfUpstream, State, StartOfUpstream from core.models.payload import DataFrame, DataPayload, MarkerFrame from proto.edu.uci.ics.amber.engine.common import ( @@ -50,6 +50,7 @@ def __init__(self): self._ports: Dict[PortIdentity, WorkerPort] = dict() self._channels: Dict[ChannelIdentity, Channel] = dict() self._current_channel_id: Optional[ChannelIdentity] = None + self.started = False def add_input_port(self, port_id: PortIdentity, schema: Schema) -> None: if port_id.id is None: @@ -109,7 +110,10 @@ def process_data_payload( if isinstance(payload.frame, State): yield payload.frame if isinstance(payload.frame, StartOfUpstream): - yield StartOfAll() + if not self.started: + yield StartOfAny() + self.started = True + yield InputInitialized() if isinstance(payload.frame, EndOfUpstream): channel = self._channels[self._current_channel_id] channel.complete() diff --git a/core/amber/src/main/python/core/models/internal_marker.py b/core/amber/src/main/python/core/models/internal_marker.py index ba37b6e212b..30f76f866ff 100644 --- a/core/amber/src/main/python/core/models/internal_marker.py +++ b/core/amber/src/main/python/core/models/internal_marker.py @@ -19,14 +19,19 @@ class SenderChange(InternalMarker): channel_id: ChannelIdentity @dataclass -class StartOfAll(InternalMarker): +class StartOfAny(InternalMarker): pass @dataclass class EndOfAll(InternalMarker): pass +@dataclass +class InputInitialized(InternalMarker): + pass @dataclass class InputExhausted(InternalMarker): pass + + diff --git a/core/amber/src/main/python/core/models/operator.py b/core/amber/src/main/python/core/models/operator.py index 3a6a9c4a019..ce38bec7b07 100644 --- a/core/amber/src/main/python/core/models/operator.py +++ b/core/amber/src/main/python/core/models/operator.py @@ -48,15 +48,14 @@ def close(self) -> None: """ pass - def process_state(self, state: State, port: int) -> State: + def produce_state_on_start(self, port: int) -> State: """ - Process an input State from the given link. + Produce a State when the given link started. - :param state: State, a State from an input port to be processed. - :param port: int, input port index of the current exhausted port. + :param port: int, input port index of the current initialized port. :return: State, producing one State object """ - return state + pass def produce_state(self, port: int) -> State: """ diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index e5524ea33df..71bc4295d91 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -7,6 +7,7 @@ from core.architecture.managers import Context from core.models import Tuple, ExceptionInfo, State +from core.models.marker import Marker from core.models.table import all_output_to_tuple from core.util import Stoppable from core.util.console_message.replace_print import replace_print @@ -29,22 +30,35 @@ def run(self) -> None: self._running.set() self._switch_context() while self._running.is_set(): + marker = self._context.tuple_processing_manager.get_input_marker() + print("DataProcessor running:", marker) + if marker is not None: + self.process_marker(marker) self.process_state() self.process_tuple() self._switch_context() + def process_marker(self, marker: Marker) -> None: + try: + executor = self._context.executor_manager.executor + port = self._context.tuple_processing_manager.get_input_port() + self._set_output_state(executor.produce_state_on_start(port)) + + except Exception as err: + logger.exception(err) + exc_info = sys.exc_info() + self._context.exception_manager.set_exception_info(exc_info) + self._report_exception(exc_info) + + finally: + self._switch_context() + def process_state(self) -> None: state = self._context.tuple_processing_manager.get_input_state() if state is not None: try: executor = self._context.executor_manager.executor - port_id = self._context.tuple_processing_manager.current_input_port_id - port: int - if port_id is None: - # no upstream, special case for source executor. - port = 0 - else: - port = port_id.id + port = self._context.tuple_processing_manager.get_input_port() with replace_print( self._context.worker_id, @@ -67,13 +81,7 @@ def process_tuple(self) -> None: try: executor = self._context.executor_manager.executor tuple_ = self._context.tuple_processing_manager.current_input_tuple - port_id = self._context.tuple_processing_manager.current_input_port_id - port: int - if port_id is None: - # no upstream, special case for source executor. - port = 0 - else: - port = port_id.id + port = self._context.tuple_processing_manager.get_input_port() if isinstance(tuple_, Tuple): output_iterator = executor.process_tuple(tuple_, port) diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index 58d47662125..acda61cd5ca 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -18,7 +18,7 @@ SenderChange, Tuple, ) -from core.models.internal_marker import StartOfAll +from core.models.internal_marker import StartOfAny, InputInitialized from core.models.internal_queue import DataElement, ControlElement from core.models.marker import State, EndOfUpstream, StartOfUpstream from core.runnables.data_processor import DataProcessor @@ -211,6 +211,11 @@ def _process_state(self, state_: State): self._check_and_process_control() self._switch_context() + def _process_input_initialized(self, input_initialized: InputInitialized): + self.context.tuple_processing_manager.current_input_marker = input_initialized + self.process_input_tuple() + self._switch_context() + def _process_input_exhausted(self, input_exhausted: InputExhausted): self._process_tuple(input_exhausted) if self.context.tuple_processing_manager.current_input_port_id is not None: @@ -237,12 +242,12 @@ def _process_sender_change_marker(self, sender_change_marker: SenderChange) -> N ) - def _process_start_of_all_marker(self, _: StartOfAll) -> None: + def _process_start_of_any_marker(self, _: StartOfAny) -> None: """ - Upon receipt of an StartOfAllMarker, which indicates the start of all input links, + Upon receipt of an StartOfAllMarker, which indicates the start of any input links, send the StartOfUpstream to all downstream workers. - :param _: StartOfAll Internal Marker + :param _: StartOfAny Internal Marker """ for to, batch in self.context.output_manager.emit_marker(StartOfUpstream()): self._output_queue.put(DataElement(tag=to, payload=batch)) @@ -303,12 +308,14 @@ def _process_data_element(self, data_element: DataElement) -> None: element, Tuple, self._process_tuple, + InputInitialized, + self._process_input_initialized, InputExhausted, self._process_input_exhausted, SenderChange, self._process_sender_change_marker, - StartOfAll, - self._process_start_of_all_marker, + StartOfAny, + self._process_start_of_any_marker, EndOfAll, self._process_end_of_all_marker, State, From bbffbbe956c5fee66ff45c064441137a9a12a415 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sat, 14 Sep 2024 01:32:34 -0700 Subject: [PATCH 095/163] update --- .../core/architecture/packaging/input_manager.py | 4 ++-- .../main/python/core/models/internal_marker.py | 4 ---- .../src/main/python/core/runnables/main_loop.py | 16 ++++++++-------- 3 files changed, 10 insertions(+), 14 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index 5e840db5abc..a877d02309d 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -1,7 +1,7 @@ from typing import Iterator, Optional, Union, Dict, List from core.models import Tuple, ArrowTableTupleProvider, Schema, InputExhausted -from core.models.internal_marker import EndOfAll, InternalMarker, SenderChange, StartOfAny, InputInitialized +from core.models.internal_marker import EndOfAll, InternalMarker, SenderChange, StartOfAny from core.models.marker import EndOfUpstream, State, StartOfUpstream from core.models.payload import DataFrame, DataPayload, MarkerFrame from proto.edu.uci.ics.amber.engine.common import ( @@ -113,7 +113,7 @@ def process_data_payload( if not self.started: yield StartOfAny() self.started = True - yield InputInitialized() + yield StartOfUpstream() if isinstance(payload.frame, EndOfUpstream): channel = self._channels[self._current_channel_id] channel.complete() diff --git a/core/amber/src/main/python/core/models/internal_marker.py b/core/amber/src/main/python/core/models/internal_marker.py index 30f76f866ff..0beb2d209fd 100644 --- a/core/amber/src/main/python/core/models/internal_marker.py +++ b/core/amber/src/main/python/core/models/internal_marker.py @@ -26,10 +26,6 @@ class StartOfAny(InternalMarker): class EndOfAll(InternalMarker): pass -@dataclass -class InputInitialized(InternalMarker): - pass - @dataclass class InputExhausted(InternalMarker): pass diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index acda61cd5ca..68138850b5b 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -18,7 +18,7 @@ SenderChange, Tuple, ) -from core.models.internal_marker import StartOfAny, InputInitialized +from core.models.internal_marker import StartOfAny from core.models.internal_queue import DataElement, ControlElement from core.models.marker import State, EndOfUpstream, StartOfUpstream from core.runnables.data_processor import DataProcessor @@ -149,7 +149,7 @@ def process_control_payload( end_time - self.context.statistics_manager.worker_start_time ) - def process_input_tuple(self) -> None: + def process_input(self) -> None: """ Process the current input tuple with the current input link. Send all result Tuples to downstream workers. @@ -203,7 +203,7 @@ def _process_control_element(self, control_element: ControlElement) -> None: def _process_tuple(self, tuple_: Union[Tuple, InputExhausted]) -> None: self.context.tuple_processing_manager.current_input_tuple = tuple_ - self.process_input_tuple() + self.process_input() self._check_and_process_control() def _process_state(self, state_: State): @@ -211,9 +211,9 @@ def _process_state(self, state_: State): self._check_and_process_control() self._switch_context() - def _process_input_initialized(self, input_initialized: InputInitialized): - self.context.tuple_processing_manager.current_input_marker = input_initialized - self.process_input_tuple() + def _process_start_of_upstream(self, start_of_upstream: StartOfUpstream): + self.context.tuple_processing_manager.current_input_marker = start_of_upstream + self.process_input() self._switch_context() def _process_input_exhausted(self, input_exhausted: InputExhausted): @@ -308,8 +308,8 @@ def _process_data_element(self, data_element: DataElement) -> None: element, Tuple, self._process_tuple, - InputInitialized, - self._process_input_initialized, + StartOfUpstream, + self._process_start_of_upstream, InputExhausted, self._process_input_exhausted, SenderChange, From e0d60567a04cef27c8e2089d42a698464b704995 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sat, 14 Sep 2024 01:34:11 -0700 Subject: [PATCH 096/163] update --- core/amber/src/main/python/core/runnables/main_loop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index 68138850b5b..c7eb030e487 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -151,8 +151,8 @@ def process_control_payload( def process_input(self) -> None: """ - Process the current input tuple with the current input link. Send all result - Tuples to downstream workers. + Process the current input tuple or state with the current input link. Send all result + Tuples or State to downstream workers. This is being invoked for each Tuple/Marker that are unpacked from the DataElement. From fdbc91f806061be45611b5fee866f48d8b1eb8d3 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sat, 14 Sep 2024 06:29:53 -0700 Subject: [PATCH 097/163] update --- .../architecture/managers/tuple_processing_manager.py | 2 +- core/amber/src/main/python/core/models/operator.py | 2 +- .../src/main/python/core/runnables/data_processor.py | 7 ++++--- core/amber/src/main/python/core/runnables/main_loop.py | 10 +++++----- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py index 68d35e45fb1..804b0d246dc 100644 --- a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py +++ b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py @@ -11,7 +11,7 @@ def __init__(self): self.current_input_marker: Optional[Marker] = None self.current_input_tuple: Optional[Union[Tuple, InputExhausted]] = None self.current_input_port_id: Optional[PortIdentity] = None - self.current_input_tuple_iter: Optional[ + self.current_input_iter: Optional[ Iterator[Union[Tuple, InputExhausted]] ] = None self.current_input_state: Optional[State] = None diff --git a/core/amber/src/main/python/core/models/operator.py b/core/amber/src/main/python/core/models/operator.py index ce38bec7b07..36c0dc1261e 100644 --- a/core/amber/src/main/python/core/models/operator.py +++ b/core/amber/src/main/python/core/models/operator.py @@ -57,7 +57,7 @@ def produce_state_on_start(self, port: int) -> State: """ pass - def produce_state(self, port: int) -> State: + def produce_state_on_finish(self, port: int) -> State: """ Produce a State after the input port is exhausted. diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index 71bc4295d91..649bbe38c1d 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -7,7 +7,7 @@ from core.architecture.managers import Context from core.models import Tuple, ExceptionInfo, State -from core.models.marker import Marker +from core.models.marker import Marker, StartOfUpstream from core.models.table import all_output_to_tuple from core.util import Stoppable from core.util.console_message.replace_print import replace_print @@ -42,7 +42,8 @@ def process_marker(self, marker: Marker) -> None: try: executor = self._context.executor_manager.executor port = self._context.tuple_processing_manager.get_input_port() - self._set_output_state(executor.produce_state_on_start(port)) + if isinstance(marker, StartOfUpstream): + self._set_output_state(executor.produce_state_on_start(port)) except Exception as err: logger.exception(err) @@ -86,7 +87,7 @@ def process_tuple(self) -> None: if isinstance(tuple_, Tuple): output_iterator = executor.process_tuple(tuple_, port) else: - self._set_output_state(executor.produce_state(port)) + self._set_output_state(executor.produce_state_on_finish(port)) output_iterator = executor.on_finish(port) with replace_print( self._context.worker_id, diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index c7eb030e487..ce3b60eddf3 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -175,13 +175,13 @@ def process_input(self) -> None: for (to, batch) in self.context.output_manager.emit_marker(output_data): self._output_queue.put(DataElement(tag=to, payload=batch)) - def process_tuple_with_udf(self) -> Iterator[Optional[Tuple]]: + def process_tuple_with_udf(self) -> Union[Iterator[Optional[Tuple]], State]: """ Process the Tuple/InputExhausted with the current link. This is a wrapper to invoke processing of the executor. - :return: Iterator[Tuple], iterator of result Tuple(s). + :return: Iterator[Tuple], iterator of result Tuple(s) or State. """ finished_current = self.context.tuple_processing_manager.finished_current finished_current.clear() @@ -286,13 +286,13 @@ def _process_data_element(self, data_element: DataElement) -> None: if self.context.state_manager.confirm_state(WorkerState.READY): self.context.state_manager.transit_to(WorkerState.RUNNING) - self.context.tuple_processing_manager.current_input_tuple_iter = ( + self.context.tuple_processing_manager.current_input_iter = ( self.context.input_manager.process_data_payload( data_element.tag, data_element.payload ) ) - if self.context.tuple_processing_manager.current_input_tuple_iter is None: + if self.context.tuple_processing_manager.current_input_iter is None: return # here the self.context.processing_manager.current_input_tuple_iter # could be modified during iteration, thus we are using the while := @@ -300,7 +300,7 @@ def _process_data_element(self, data_element: DataElement) -> None: # syntax sugar. while ( element := next( - self.context.tuple_processing_manager.current_input_tuple_iter, None + self.context.tuple_processing_manager.current_input_iter, None ) ) is not None: try: From e5c9cf542cd4b08e87d23282d7c8e979cea3a041 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sat, 14 Sep 2024 06:35:08 -0700 Subject: [PATCH 098/163] update --- .../handlers/control/replay_current_tuple_handler.py | 4 ++-- core/amber/src/main/python/core/runnables/main_loop.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/handlers/control/replay_current_tuple_handler.py b/core/amber/src/main/python/core/architecture/handlers/control/replay_current_tuple_handler.py index 8efd04e9ee7..c9c3108f0fb 100644 --- a/core/amber/src/main/python/core/architecture/handlers/control/replay_current_tuple_handler.py +++ b/core/amber/src/main/python/core/architecture/handlers/control/replay_current_tuple_handler.py @@ -16,9 +16,9 @@ def __call__(self, context: Context, command: cmd, *args, **kwargs): if not context.state_manager.confirm_state(WorkerState.COMPLETED): # chain the current input tuple back on top of the current iterator to # be processed once more - context.tuple_processing_manager.current_input_tuple_iter = itertools.chain( + context.tuple_processing_manager.current_input_iter = itertools.chain( [context.tuple_processing_manager.current_input_tuple], - context.tuple_processing_manager.current_input_tuple_iter, + context.tuple_processing_manager.current_input_iter, ) context.pause_manager.resume(PauseType.EXCEPTION_PAUSE) return None diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index ce3b60eddf3..8136ff5b9b2 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -294,7 +294,7 @@ def _process_data_element(self, data_element: DataElement) -> None: if self.context.tuple_processing_manager.current_input_iter is None: return - # here the self.context.processing_manager.current_input_tuple_iter + # here the self.context.processing_manager.current_input_iter # could be modified during iteration, thus we are using the while := # way to iterate through the iterator, instead of the for-each-loop # syntax sugar. From 20f9c02a5068892885a72300324f4d28296ea8ba Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sat, 14 Sep 2024 07:15:59 -0700 Subject: [PATCH 099/163] update --- .../managers/tuple_processing_manager.py | 9 +----- .../architecture/packaging/output_manager.py | 17 +++++------ .../src/main/python/core/models/marker.py | 2 +- .../src/main/python/core/models/operator.py | 9 ++++++ .../python/core/runnables/data_processor.py | 29 +++---------------- .../main/python/core/runnables/main_loop.py | 2 +- 6 files changed, 24 insertions(+), 44 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py index 804b0d246dc..8623229d59c 100644 --- a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py +++ b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py @@ -11,10 +11,7 @@ def __init__(self): self.current_input_marker: Optional[Marker] = None self.current_input_tuple: Optional[Union[Tuple, InputExhausted]] = None self.current_input_port_id: Optional[PortIdentity] = None - self.current_input_iter: Optional[ - Iterator[Union[Tuple, InputExhausted]] - ] = None - self.current_input_state: Optional[State] = None + self.current_input_iter: Optional[Iterator[Union[Tuple, InputExhausted]]] = None self.current_output_state: Optional[State] = None self.current_output_tuple: Optional[Tuple] = None self.context_switch_condition: Condition = Condition() @@ -24,10 +21,6 @@ def get_output_tuple(self) -> Optional[Tuple]: ret, self.current_output_tuple = self.current_output_tuple, None return ret - def get_input_state(self) -> Optional[State]: - ret, self.current_input_state = self.current_input_state, None - return ret - def get_output_state(self) -> Optional[State]: ret, self.current_output_state = self.current_output_state, None return ret diff --git a/core/amber/src/main/python/core/architecture/packaging/output_manager.py b/core/amber/src/main/python/core/architecture/packaging/output_manager.py index f60e44e75fe..42cfc925d47 100644 --- a/core/amber/src/main/python/core/architecture/packaging/output_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/output_manager.py @@ -2,27 +2,26 @@ from collections import OrderedDict from itertools import chain from loguru import logger -from typing import Iterable, Iterator - from pyarrow import Table +from typing import Iterable, Iterator from core.architecture.packaging.input_manager import WorkerPort, Channel +from core.architecture.sendsemantics.broad_cast_partitioner import ( + BroadcastPartitioner, +) from core.architecture.sendsemantics.hash_based_shuffle_partitioner import ( HashBasedShufflePartitioner, ) +from core.architecture.sendsemantics.one_to_one_partitioner import OneToOnePartitioner +from core.architecture.sendsemantics.partitioner import Partitioner from core.architecture.sendsemantics.range_based_shuffle_partitioner import ( RangeBasedShufflePartitioner, ) -from core.architecture.sendsemantics.one_to_one_partitioner import OneToOnePartitioner -from core.architecture.sendsemantics.partitioner import Partitioner from core.architecture.sendsemantics.round_robin_partitioner import ( RoundRobinPartitioner, ) -from core.architecture.sendsemantics.broad_cast_partitioner import ( - BroadcastPartitioner, -) -from core.models import Tuple, Schema, MarkerFrame, State -from core.models.marker import EndOfUpstream, Marker +from core.models import Tuple, Schema, MarkerFrame +from core.models.marker import Marker from core.models.payload import DataPayload, DataFrame from core.util import get_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import ( diff --git a/core/amber/src/main/python/core/models/marker.py b/core/amber/src/main/python/core/models/marker.py index 0fd1130e4d1..7a679802624 100644 --- a/core/amber/src/main/python/core/models/marker.py +++ b/core/amber/src/main/python/core/models/marker.py @@ -26,7 +26,7 @@ def __init__(self, table: Optional[Table] = None): self.schema = Schema() else: self.data = table.to_pandas().iloc[0].to_dict() - self.schema = table.schema + self.schema = Schema(table.schema) def add(self, key: str, value: any, value_type: Optional[AttributeType] = None) -> None: self.data[key] = value diff --git a/core/amber/src/main/python/core/models/operator.py b/core/amber/src/main/python/core/models/operator.py index 36c0dc1261e..4c725c9eddb 100644 --- a/core/amber/src/main/python/core/models/operator.py +++ b/core/amber/src/main/python/core/models/operator.py @@ -48,6 +48,15 @@ def close(self) -> None: """ pass + def process_state(self, state_: State, port: int) -> State: + """ + Process an input State from the given link. + :param state_: State, a State from an input port to be processed. + :param port: int, input port index of the current exhausted port. + :return: State, producing one State object + """ + return state_ + def produce_state_on_start(self, port: int) -> State: """ Produce a State when the given link started. diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index 649bbe38c1d..37c00da9194 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -31,11 +31,10 @@ def run(self) -> None: self._switch_context() while self._running.is_set(): marker = self._context.tuple_processing_manager.get_input_marker() - print("DataProcessor running:", marker) if marker is not None: self.process_marker(marker) - self.process_state() - self.process_tuple() + else: + self.process_tuple() self._switch_context() def process_marker(self, marker: Marker) -> None: @@ -44,6 +43,8 @@ def process_marker(self, marker: Marker) -> None: port = self._context.tuple_processing_manager.get_input_port() if isinstance(marker, StartOfUpstream): self._set_output_state(executor.produce_state_on_start(port)) + elif isinstance(marker, State): + self._set_output_state(executor.process_state(marker, port)) except Exception as err: logger.exception(err) @@ -54,28 +55,6 @@ def process_marker(self, marker: Marker) -> None: finally: self._switch_context() - def process_state(self) -> None: - state = self._context.tuple_processing_manager.get_input_state() - if state is not None: - try: - executor = self._context.executor_manager.executor - port = self._context.tuple_processing_manager.get_input_port() - - with replace_print( - self._context.worker_id, - self._context.console_message_manager.print_buf, - ): - self._set_output_state(executor.process_state(state, port)) - - except Exception as err: - logger.exception(err) - exc_info = sys.exc_info() - self._context.exception_manager.set_exception_info(exc_info) - self._report_exception(exc_info) - - finally: - self._switch_context() - def process_tuple(self) -> None: finished_current = self._context.tuple_processing_manager.finished_current while not finished_current.is_set(): diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index 8136ff5b9b2..e6dc3d28779 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -207,7 +207,7 @@ def _process_tuple(self, tuple_: Union[Tuple, InputExhausted]) -> None: self._check_and_process_control() def _process_state(self, state_: State): - self.context.tuple_processing_manager.current_input_state = state_ + self.context.tuple_processing_manager.current_input_marker = state_ self._check_and_process_control() self._switch_context() From f9c201b9da7973c11972092ac7a58840f7bfed40 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sat, 14 Sep 2024 07:54:16 -0700 Subject: [PATCH 100/163] update --- .../managers/tuple_processing_manager.py | 5 ++-- .../architecture/packaging/input_manager.py | 8 +++---- .../src/main/python/core/models/__init__.py | 3 +-- .../python/core/models/internal_marker.py | 6 ----- .../src/main/python/core/models/operator.py | 12 +++------- .../python/core/runnables/data_processor.py | 23 +++++++++++++++---- .../main/python/core/runnables/main_loop.py | 12 +++++----- .../amber/src/main/python/pyamber/__init__.py | 2 -- .../src/main/python/pytexera/__init__.py | 1 - .../main/python/pytexera/udf/udf_operator.py | 3 +-- 10 files changed, 35 insertions(+), 40 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py index 8623229d59c..25b64bc6a2d 100644 --- a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py +++ b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py @@ -1,7 +1,6 @@ from threading import Event, Condition from typing import Optional, Union, Tuple, Iterator -from core.models import InputExhausted from core.models.marker import State, Marker from proto.edu.uci.ics.amber.engine.common import PortIdentity @@ -9,9 +8,9 @@ class TupleProcessingManager: def __init__(self): self.current_input_marker: Optional[Marker] = None - self.current_input_tuple: Optional[Union[Tuple, InputExhausted]] = None + self.current_input_tuple: Optional[Tuple] = None self.current_input_port_id: Optional[PortIdentity] = None - self.current_input_iter: Optional[Iterator[Union[Tuple, InputExhausted]]] = None + self.current_input_iter: Optional[Iterator[Union[Tuple, Marker]]] = None self.current_output_state: Optional[State] = None self.current_output_tuple: Optional[Tuple] = None self.context_switch_condition: Condition = Condition() diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index a877d02309d..75402ba6853 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -1,6 +1,6 @@ from typing import Iterator, Optional, Union, Dict, List -from core.models import Tuple, ArrowTableTupleProvider, Schema, InputExhausted +from core.models import Tuple, ArrowTableTupleProvider, Schema from core.models.internal_marker import EndOfAll, InternalMarker, SenderChange, StartOfAny from core.models.marker import EndOfUpstream, State, StartOfUpstream from core.models.payload import DataFrame, DataPayload, MarkerFrame @@ -79,10 +79,10 @@ def register_input( def process_data_payload( self, from_: ActorVirtualIdentity, payload: DataPayload - ) -> Iterator[Union[Tuple, InputExhausted, InternalMarker]]: + ) -> Iterator[Union[Tuple, EndOfUpstream, InternalMarker]]: # special case used to yield for source op if from_ == InputManager.SOURCE_STARTER: - yield InputExhausted() + yield EndOfUpstream() yield EndOfAll() return current_channel_id = None @@ -126,7 +126,7 @@ def process_data_payload( ) if port_completed: - yield InputExhausted() + yield EndOfUpstream() all_ports_completed = all( map(lambda port: port.is_completed(), self._ports.values()) diff --git a/core/amber/src/main/python/core/models/__init__.py b/core/amber/src/main/python/core/models/__init__.py index bcb71c16977..c39a55ac88c 100644 --- a/core/amber/src/main/python/core/models/__init__.py +++ b/core/amber/src/main/python/core/models/__init__.py @@ -2,7 +2,7 @@ from typing import NamedTuple from .internal_queue import InternalQueue -from .internal_marker import EndOfAll, InternalMarker, SenderChange, InputExhausted +from .internal_marker import EndOfAll, InternalMarker, SenderChange from .tuple import Tuple, TupleLike, ArrowTableTupleProvider from .table import Table, TableLike from .batch import Batch, BatchLike @@ -30,7 +30,6 @@ class ExceptionInfo(NamedTuple): "EndOfAll", "InternalMarker", "SenderChange", - "InputExhausted", "Tuple", "TupleLike", "ArrowTableTupleProvider", diff --git a/core/amber/src/main/python/core/models/internal_marker.py b/core/amber/src/main/python/core/models/internal_marker.py index 0beb2d209fd..551f9330b65 100644 --- a/core/amber/src/main/python/core/models/internal_marker.py +++ b/core/amber/src/main/python/core/models/internal_marker.py @@ -25,9 +25,3 @@ class StartOfAny(InternalMarker): @dataclass class EndOfAll(InternalMarker): pass - -@dataclass -class InputExhausted(InternalMarker): - pass - - diff --git a/core/amber/src/main/python/core/models/operator.py b/core/amber/src/main/python/core/models/operator.py index 4c725c9eddb..5ffd3b7c119 100644 --- a/core/amber/src/main/python/core/models/operator.py +++ b/core/amber/src/main/python/core/models/operator.py @@ -7,7 +7,7 @@ from deprecated import deprecated -from . import InputExhausted, Table, TableLike, Tuple, TupleLike, Batch, BatchLike +from . import Table, TableLike, Tuple, TupleLike, Batch, BatchLike from .marker import State from .table import all_output_to_tuple @@ -244,7 +244,7 @@ class TupleOperator(Operator): @abstractmethod def process_tuple( - self, tuple_: Union[Tuple, InputExhausted], input_: int + self, tuple_: Tuple, input_: int ) -> Iterator[Optional[TupleLike]]: """ Process an input Tuple from the given link. @@ -256,10 +256,4 @@ def process_tuple( :return: Iterator[Optional[TupleLike]], producing one TupleLike object at a time, or None. """ - yield - - def on_finish(self, port: int) -> Iterator[Optional[TupleLike]]: - """ - For backward compatibility. - """ - yield from self.process_tuple(InputExhausted(), input_=port) + yield \ No newline at end of file diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index 37c00da9194..07f92d2ff5c 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -7,7 +7,7 @@ from core.architecture.managers import Context from core.models import Tuple, ExceptionInfo, State -from core.models.marker import Marker, StartOfUpstream +from core.models.marker import Marker, StartOfUpstream, EndOfUpstream from core.models.table import all_output_to_tuple from core.util import Stoppable from core.util.console_message.replace_print import replace_print @@ -41,10 +41,23 @@ def process_marker(self, marker: Marker) -> None: try: executor = self._context.executor_manager.executor port = self._context.tuple_processing_manager.get_input_port() - if isinstance(marker, StartOfUpstream): - self._set_output_state(executor.produce_state_on_start(port)) - elif isinstance(marker, State): - self._set_output_state(executor.process_state(marker, port)) + with replace_print( + self._context.worker_id, + self._context.console_message_manager.print_buf, + ): + if isinstance(marker, StartOfUpstream): + self._set_output_state(executor.produce_state_on_start(port)) + elif isinstance(marker, State): + self._set_output_state(executor.process_state(marker, port)) + elif isinstance(marker, EndOfUpstream): + output_iterator = executor.on_finish(port) + for output in output_iterator: + # output could be a None, a TupleLike, or a TableLike. + for output_tuple in all_output_to_tuple(output): + self._set_output_tuple(output_tuple) + self._switch_context() + finished_current.set() + except Exception as err: logger.exception(err) diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index e6dc3d28779..bb2143e409a 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -13,7 +13,6 @@ from core.architecture.rpc.async_rpc_client import AsyncRPCClient from core.architecture.rpc.async_rpc_server import AsyncRPCServer from core.models import ( - InputExhausted, InternalQueue, SenderChange, Tuple, @@ -201,7 +200,7 @@ def _process_control_element(self, control_element: ControlElement) -> None: """ self.process_control_payload(control_element.tag, control_element.payload) - def _process_tuple(self, tuple_: Union[Tuple, InputExhausted]) -> None: + def _process_tuple(self, tuple_: Tuple) -> None: self.context.tuple_processing_manager.current_input_tuple = tuple_ self.process_input() self._check_and_process_control() @@ -216,8 +215,9 @@ def _process_start_of_upstream(self, start_of_upstream: StartOfUpstream): self.process_input() self._switch_context() - def _process_input_exhausted(self, input_exhausted: InputExhausted): - self._process_tuple(input_exhausted) + def _process_end_of_upstream(self, end_of_upstream: EndOfUpstream): + self.context.tuple_processing_manager.current_input_marker = end_of_upstream + self._switch_context() if self.context.tuple_processing_manager.current_input_port_id is not None: control_command = set_one_of( ControlCommandV2, @@ -310,8 +310,8 @@ def _process_data_element(self, data_element: DataElement) -> None: self._process_tuple, StartOfUpstream, self._process_start_of_upstream, - InputExhausted, - self._process_input_exhausted, + EndOfUpstream, + self._process_end_of_upstream, SenderChange, self._process_sender_change_marker, StartOfAny, diff --git a/core/amber/src/main/python/pyamber/__init__.py b/core/amber/src/main/python/pyamber/__init__.py index b57b2924313..ab2b9750f32 100644 --- a/core/amber/src/main/python/pyamber/__init__.py +++ b/core/amber/src/main/python/pyamber/__init__.py @@ -1,5 +1,4 @@ from core.models import ( - InputExhausted, Tuple, TupleLike, TupleOperator, @@ -15,7 +14,6 @@ ) __all__ = [ - "InputExhausted", "Tuple", "TupleLike", "TupleOperator", diff --git a/core/amber/src/main/python/pytexera/__init__.py b/core/amber/src/main/python/pytexera/__init__.py index 99099c21312..c2bcbe43c7d 100644 --- a/core/amber/src/main/python/pytexera/__init__.py +++ b/core/amber/src/main/python/pytexera/__init__.py @@ -13,7 +13,6 @@ __all__ = [ "State", - "InputExhausted", "Tuple", "TupleLike", "UDFOperator", diff --git a/core/amber/src/main/python/pytexera/udf/udf_operator.py b/core/amber/src/main/python/pytexera/udf/udf_operator.py index cc6ed9d9ecd..4fd7fb042e5 100644 --- a/core/amber/src/main/python/pytexera/udf/udf_operator.py +++ b/core/amber/src/main/python/pytexera/udf/udf_operator.py @@ -2,7 +2,6 @@ from typing import Iterator, Optional, Union from deprecated import deprecated -from core.models import InputExhausted from pyamber import * @@ -17,7 +16,7 @@ def open(self) -> None: @abstractmethod def process_tuple( - self, tuple_: Union[Tuple, InputExhausted], input_: int + self, tuple_: Tuple, input_: int ) -> Iterator[Optional[TupleLike]]: """ Process an input Tuple from the given link. From 5254aa71d3a6cd72ae960a40b2f8ad4ef792dbe6 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sat, 14 Sep 2024 08:01:11 -0700 Subject: [PATCH 101/163] update --- .../managers/tuple_processing_manager.py | 8 ++--- .../python/core/runnables/data_processor.py | 30 +++++++++---------- .../main/python/core/runnables/main_loop.py | 1 - 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py index 25b64bc6a2d..999a2b57e4b 100644 --- a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py +++ b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py @@ -16,6 +16,10 @@ def __init__(self): self.context_switch_condition: Condition = Condition() self.finished_current: Event = Event() + def get_input_marker(self) -> Optional[State]: + ret, self.current_input_marker = self.current_input_marker, None + return ret + def get_output_tuple(self) -> Optional[Tuple]: ret, self.current_output_tuple = self.current_output_tuple, None return ret @@ -24,10 +28,6 @@ def get_output_state(self) -> Optional[State]: ret, self.current_output_state = self.current_output_state, None return ret - def get_input_marker(self) -> Optional[State]: - ret, self.current_input_marker = self.current_input_marker, None - return ret - def get_input_port(self) -> int: port_id = self.current_input_port_id port: int diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index 07f92d2ff5c..3b1221c3f78 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -41,22 +41,19 @@ def process_marker(self, marker: Marker) -> None: try: executor = self._context.executor_manager.executor port = self._context.tuple_processing_manager.get_input_port() - with replace_print( - self._context.worker_id, - self._context.console_message_manager.print_buf, - ): - if isinstance(marker, StartOfUpstream): - self._set_output_state(executor.produce_state_on_start(port)) - elif isinstance(marker, State): - self._set_output_state(executor.process_state(marker, port)) - elif isinstance(marker, EndOfUpstream): - output_iterator = executor.on_finish(port) - for output in output_iterator: - # output could be a None, a TupleLike, or a TableLike. - for output_tuple in all_output_to_tuple(output): - self._set_output_tuple(output_tuple) - self._switch_context() - finished_current.set() + + if isinstance(marker, StartOfUpstream): + self._set_output_state(executor.produce_state_on_start(port)) + elif isinstance(marker, State): + self._set_output_state(executor.process_state(marker, port)) + elif isinstance(marker, EndOfUpstream): + print("here!!") + output_iterator = executor.on_finish(port) + for output in output_iterator: + # output could be a None, a TupleLike, or a TableLike. + for output_tuple in all_output_to_tuple(output): + self._set_output_tuple(output_tuple) + self._switch_context() except Exception as err: @@ -79,6 +76,7 @@ def process_tuple(self) -> None: if isinstance(tuple_, Tuple): output_iterator = executor.process_tuple(tuple_, port) else: + print("here2!!", tuple_) self._set_output_state(executor.produce_state_on_finish(port)) output_iterator = executor.on_finish(port) with replace_print( diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index bb2143e409a..c465c1bb212 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -217,7 +217,6 @@ def _process_start_of_upstream(self, start_of_upstream: StartOfUpstream): def _process_end_of_upstream(self, end_of_upstream: EndOfUpstream): self.context.tuple_processing_manager.current_input_marker = end_of_upstream - self._switch_context() if self.context.tuple_processing_manager.current_input_port_id is not None: control_command = set_one_of( ControlCommandV2, From 067247f5b529052a509e1c15d653acc36211e8ba Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sat, 14 Sep 2024 19:03:47 -0700 Subject: [PATCH 102/163] update --- core/amber/src/main/python/core/runnables/data_processor.py | 6 +++--- core/amber/src/main/python/core/runnables/main_loop.py | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index 3b1221c3f78..f4accb18e21 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -48,6 +48,7 @@ def process_marker(self, marker: Marker) -> None: self._set_output_state(executor.process_state(marker, port)) elif isinstance(marker, EndOfUpstream): print("here!!") + self._set_output_state(executor.produce_state_on_finish(port)) output_iterator = executor.on_finish(port) for output in output_iterator: # output could be a None, a TupleLike, or a TableLike. @@ -72,13 +73,12 @@ def process_tuple(self) -> None: executor = self._context.executor_manager.executor tuple_ = self._context.tuple_processing_manager.current_input_tuple port = self._context.tuple_processing_manager.get_input_port() - + print("here1!!", tuple_) if isinstance(tuple_, Tuple): output_iterator = executor.process_tuple(tuple_, port) else: print("here2!!", tuple_) - self._set_output_state(executor.produce_state_on_finish(port)) - output_iterator = executor.on_finish(port) + with replace_print( self._context.worker_id, self._context.console_message_manager.print_buf, diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index c465c1bb212..06309cd8b5b 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -217,6 +217,8 @@ def _process_start_of_upstream(self, start_of_upstream: StartOfUpstream): def _process_end_of_upstream(self, end_of_upstream: EndOfUpstream): self.context.tuple_processing_manager.current_input_marker = end_of_upstream + self._check_and_process_control() + self._switch_context() if self.context.tuple_processing_manager.current_input_port_id is not None: control_command = set_one_of( ControlCommandV2, From 13eafe0363115719fe43a7834835bc16aca18a11 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sun, 15 Sep 2024 04:13:53 -0700 Subject: [PATCH 103/163] update --- .../control/replay_current_tuple_handler.py | 4 +- .../core/architecture/managers/context.py | 2 + .../managers/marker_processing_manager.py | 15 ++++ .../managers/tuple_processing_manager.py | 12 +--- .../architecture/packaging/input_manager.py | 13 ++-- .../python/core/runnables/data_processor.py | 68 ++++++++----------- .../main/python/core/runnables/main_loop.py | 50 +++++++------- .../architecture/worker/DataProcessor.scala | 6 +- 8 files changed, 87 insertions(+), 83 deletions(-) create mode 100644 core/amber/src/main/python/core/architecture/managers/marker_processing_manager.py diff --git a/core/amber/src/main/python/core/architecture/handlers/control/replay_current_tuple_handler.py b/core/amber/src/main/python/core/architecture/handlers/control/replay_current_tuple_handler.py index c9c3108f0fb..8efd04e9ee7 100644 --- a/core/amber/src/main/python/core/architecture/handlers/control/replay_current_tuple_handler.py +++ b/core/amber/src/main/python/core/architecture/handlers/control/replay_current_tuple_handler.py @@ -16,9 +16,9 @@ def __call__(self, context: Context, command: cmd, *args, **kwargs): if not context.state_manager.confirm_state(WorkerState.COMPLETED): # chain the current input tuple back on top of the current iterator to # be processed once more - context.tuple_processing_manager.current_input_iter = itertools.chain( + context.tuple_processing_manager.current_input_tuple_iter = itertools.chain( [context.tuple_processing_manager.current_input_tuple], - context.tuple_processing_manager.current_input_iter, + context.tuple_processing_manager.current_input_tuple_iter, ) context.pause_manager.resume(PauseType.EXCEPTION_PAUSE) return None diff --git a/core/amber/src/main/python/core/architecture/managers/context.py b/core/amber/src/main/python/core/architecture/managers/context.py index 4236ccb2b3a..23d6023e84f 100644 --- a/core/amber/src/main/python/core/architecture/managers/context.py +++ b/core/amber/src/main/python/core/architecture/managers/context.py @@ -2,6 +2,7 @@ from .console_message_manager import ConsoleMessageManager from .debug_manager import DebugManager from .exception_manager import ExceptionManager +from .marker_processing_manager import MarkerProcessingManager from .tuple_processing_manager import TupleProcessingManager from .executor_manager import ExecutorManager from .pause_manager import PauseManager @@ -26,6 +27,7 @@ def __init__(self, worker_id, input_queue): self.input_queue: InternalQueue = input_queue self.executor_manager = ExecutorManager() self.tuple_processing_manager = TupleProcessingManager() + self.marker_processing_manager = MarkerProcessingManager() self.exception_manager = ExceptionManager() self.state_manager = StateManager( { diff --git a/core/amber/src/main/python/core/architecture/managers/marker_processing_manager.py b/core/amber/src/main/python/core/architecture/managers/marker_processing_manager.py new file mode 100644 index 00000000000..80ceb54a0f0 --- /dev/null +++ b/core/amber/src/main/python/core/architecture/managers/marker_processing_manager.py @@ -0,0 +1,15 @@ +from typing import Optional +from core.models.marker import State, Marker + +class MarkerProcessingManager: + def __init__(self): + self.current_input_marker: Optional[Marker] = None + self.current_output_state: Optional[State] = None + + def get_input_marker(self) -> Optional[State]: + ret, self.current_input_marker = self.current_input_marker, None + return ret + + def get_output_state(self) -> Optional[State]: + ret, self.current_output_state = self.current_output_state, None + return ret \ No newline at end of file diff --git a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py index 999a2b57e4b..fcc9ce88552 100644 --- a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py +++ b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py @@ -7,27 +7,21 @@ class TupleProcessingManager: def __init__(self): - self.current_input_marker: Optional[Marker] = None self.current_input_tuple: Optional[Tuple] = None self.current_input_port_id: Optional[PortIdentity] = None - self.current_input_iter: Optional[Iterator[Union[Tuple, Marker]]] = None - self.current_output_state: Optional[State] = None + self.current_input_tuple_iter: Optional[Iterator[Tuple]] = None self.current_output_tuple: Optional[Tuple] = None self.context_switch_condition: Condition = Condition() self.finished_current: Event = Event() - def get_input_marker(self) -> Optional[State]: - ret, self.current_input_marker = self.current_input_marker, None + def get_input_tuple(self) -> Optional[State]: + ret, self.current_input_tuple = self.current_input_tuple, None return ret def get_output_tuple(self) -> Optional[Tuple]: ret, self.current_output_tuple = self.current_output_tuple, None return ret - def get_output_state(self) -> Optional[State]: - ret, self.current_output_state = self.current_output_state, None - return ret - def get_input_port(self) -> int: port_id = self.current_input_port_id port: int diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index 75402ba6853..564fca4e549 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -96,7 +96,6 @@ def process_data_payload( ): self._current_channel_id = current_channel_id yield SenderChange(current_channel_id) - if isinstance(payload, DataFrame): for field_accessor in ArrowTableTupleProvider(payload.frame): yield Tuple( @@ -109,12 +108,12 @@ def process_data_payload( elif isinstance(payload, MarkerFrame): if isinstance(payload.frame, State): yield payload.frame - if isinstance(payload.frame, StartOfUpstream): + if isinstance(payload.frame, StartOfUpstream): #StartOfInputChannel() if not self.started: - yield StartOfAny() + yield StartOfAny() #StartOfOutputPorts() self.started = True - yield StartOfUpstream() - if isinstance(payload.frame, EndOfUpstream): + yield StartOfUpstream() #StartOfInputChannel() + if isinstance(payload.frame, EndOfUpstream): #EndOfInputChannel() channel = self._channels[self._current_channel_id] channel.complete() port_id = channel.port_id @@ -126,14 +125,14 @@ def process_data_payload( ) if port_completed: - yield EndOfUpstream() + yield EndOfUpstream() #EndOfInputPort() all_ports_completed = all( map(lambda port: port.is_completed(), self._ports.values()) ) if all_ports_completed: - yield EndOfAll() + yield EndOfAll() #EndOfOutputPorts() else: raise NotImplementedError() diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index f4accb18e21..010b6902f5b 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -4,9 +4,9 @@ from threading import Event from loguru import logger - +from typing import Iterator, Optional from core.architecture.managers import Context -from core.models import Tuple, ExceptionInfo, State +from core.models import Tuple, ExceptionInfo, State, TupleLike from core.models.marker import Marker, StartOfUpstream, EndOfUpstream from core.models.table import all_output_to_tuple from core.util import Stoppable @@ -30,31 +30,31 @@ def run(self) -> None: self._running.set() self._switch_context() while self._running.is_set(): - marker = self._context.tuple_processing_manager.get_input_marker() + marker = self._context.marker_processing_manager.get_input_marker() + tuple_ = self._context.tuple_processing_manager.current_input_tuple if marker is not None: self.process_marker(marker) - else: + elif tuple_ is not None: self.process_tuple() + else: + raise RuntimeError("No marker or tuple to process.") self._switch_context() def process_marker(self, marker: Marker) -> None: try: executor = self._context.executor_manager.executor port = self._context.tuple_processing_manager.get_input_port() - - if isinstance(marker, StartOfUpstream): - self._set_output_state(executor.produce_state_on_start(port)) - elif isinstance(marker, State): - self._set_output_state(executor.process_state(marker, port)) - elif isinstance(marker, EndOfUpstream): - print("here!!") - self._set_output_state(executor.produce_state_on_finish(port)) - output_iterator = executor.on_finish(port) - for output in output_iterator: - # output could be a None, a TupleLike, or a TableLike. - for output_tuple in all_output_to_tuple(output): - self._set_output_tuple(output_tuple) - self._switch_context() + with replace_print( + self._context.worker_id, + self._context.console_message_manager.print_buf, + ): + if isinstance(marker, StartOfUpstream): + self._set_output_state(executor.produce_state_on_start(port)) + elif isinstance(marker, State): + self._set_output_state(executor.process_state(marker, port)) + elif isinstance(marker, EndOfUpstream): + self._set_output_state(executor.produce_state_on_finish(port)) + self._set_output_tuple(executor.on_finish(port)) except Exception as err: @@ -71,26 +71,13 @@ def process_tuple(self) -> None: while not finished_current.is_set(): try: executor = self._context.executor_manager.executor - tuple_ = self._context.tuple_processing_manager.current_input_tuple port = self._context.tuple_processing_manager.get_input_port() - print("here1!!", tuple_) - if isinstance(tuple_, Tuple): - output_iterator = executor.process_tuple(tuple_, port) - else: - print("here2!!", tuple_) - + tuple_ = self._context.tuple_processing_manager.get_input_tuple() with replace_print( self._context.worker_id, self._context.console_message_manager.print_buf, ): - for output in output_iterator: - # output could be a None, a TupleLike, or a TableLike. - for output_tuple in all_output_to_tuple(output): - self._set_output_tuple(output_tuple) - self._switch_context() - - # current tuple finished successfully - finished_current.set() + self._set_output_tuple(executor.process_tuple(tuple_, port)) except Exception as err: logger.exception(err) @@ -101,13 +88,18 @@ def process_tuple(self) -> None: finally: self._switch_context() - def _set_output_tuple(self, output_tuple) -> None: - if output_tuple is not None: - output_tuple.finalize(self._context.output_manager.get_port().get_schema()) - self._context.tuple_processing_manager.current_output_tuple = output_tuple + def _set_output_tuple(self, output_iterator: Iterator[Optional[TupleLike]]) -> None: + for output in output_iterator: + # output could be a None, a TupleLike, or a TableLike. + for output_tuple in all_output_to_tuple(output): + if output_tuple is not None: + output_tuple.finalize(self._context.output_manager.get_port().get_schema()) + self._context.tuple_processing_manager.current_output_tuple = output_tuple + self._switch_context() + self._context.tuple_processing_manager.finished_current.set() def _set_output_state(self, output_state: State) -> None: - self._context.tuple_processing_manager.current_output_state = output_state + self._context.marker_processing_manager.current_output_state = output_state def _switch_context(self) -> None: """ diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index 06309cd8b5b..87e3589b9d2 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -148,7 +148,7 @@ def process_control_payload( end_time - self.context.statistics_manager.worker_start_time ) - def process_input(self) -> None: + def process_input_tuple(self) -> None: """ Process the current input tuple or state with the current input link. Send all result Tuples or State to downstream workers. @@ -164,17 +164,22 @@ def process_input(self) -> None: for output_data in self.process_tuple_with_udf(): self._check_and_process_control() if output_data is not None: - if isinstance(output_data, Tuple): - self.context.statistics_manager.increase_output_tuple_count( - PortIdentity(0) - ) - for (to, batch) in self.context.output_manager.tuple_to_batch(output_data): - self._output_queue.put(DataElement(tag=to, payload=batch)) - elif isinstance(output_data, State): - for (to, batch) in self.context.output_manager.emit_marker(output_data): - self._output_queue.put(DataElement(tag=to, payload=batch)) + self.context.statistics_manager.increase_output_tuple_count( + PortIdentity(0) + ) + for (to, batch) in self.context.output_manager.tuple_to_batch(output_data): + self._output_queue.put(DataElement(tag=to, payload=batch)) + + def process_input_state(self) -> None: + self._switch_context() + output_state = self.context.marker_processing_manager.get_output_state() + self._switch_context() + if output_state is not None: + for (to, batch) in self.context.output_manager.emit_marker(output_state): + self._output_queue.put(DataElement(tag=to, payload=batch)) - def process_tuple_with_udf(self) -> Union[Iterator[Optional[Tuple]], State]: + + def process_tuple_with_udf(self) -> Iterator[Optional[Tuple]]: """ Process the Tuple/InputExhausted with the current link. @@ -189,8 +194,6 @@ def process_tuple_with_udf(self) -> Union[Iterator[Optional[Tuple]], State]: self._check_and_process_control() self._switch_context() yield self.context.tuple_processing_manager.get_output_tuple() - self._check_and_process_control() - yield self.context.tuple_processing_manager.get_output_state() def _process_control_element(self, control_element: ControlElement) -> None: """ @@ -202,23 +205,22 @@ def _process_control_element(self, control_element: ControlElement) -> None: def _process_tuple(self, tuple_: Tuple) -> None: self.context.tuple_processing_manager.current_input_tuple = tuple_ - self.process_input() + self.process_input_tuple() self._check_and_process_control() def _process_state(self, state_: State): - self.context.tuple_processing_manager.current_input_marker = state_ + self.context.marker_processing_manager.current_input_marker = state_ self._check_and_process_control() self._switch_context() def _process_start_of_upstream(self, start_of_upstream: StartOfUpstream): - self.context.tuple_processing_manager.current_input_marker = start_of_upstream - self.process_input() - self._switch_context() + self.context.marker_processing_manager.current_input_marker = start_of_upstream + self.process_input_state() def _process_end_of_upstream(self, end_of_upstream: EndOfUpstream): - self.context.tuple_processing_manager.current_input_marker = end_of_upstream - self._check_and_process_control() - self._switch_context() + self.context.marker_processing_manager.current_input_marker = end_of_upstream + self.process_input_state() + self.process_input_tuple() if self.context.tuple_processing_manager.current_input_port_id is not None: control_command = set_one_of( ControlCommandV2, @@ -287,13 +289,13 @@ def _process_data_element(self, data_element: DataElement) -> None: if self.context.state_manager.confirm_state(WorkerState.READY): self.context.state_manager.transit_to(WorkerState.RUNNING) - self.context.tuple_processing_manager.current_input_iter = ( + self.context.tuple_processing_manager.current_input_tuple_iter = ( self.context.input_manager.process_data_payload( data_element.tag, data_element.payload ) ) - if self.context.tuple_processing_manager.current_input_iter is None: + if self.context.tuple_processing_manager.current_input_tuple_iter is None: return # here the self.context.processing_manager.current_input_iter # could be modified during iteration, thus we are using the while := @@ -301,7 +303,7 @@ def _process_data_element(self, data_element: DataElement) -> None: # syntax sugar. while ( element := next( - self.context.tuple_processing_manager.current_input_iter, None + self.context.tuple_processing_manager.current_input_tuple_iter, None ) ) is not None: try: diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index d7885da7d6d..4e621d734c0 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -121,13 +121,13 @@ class DataProcessor( */ private[this] def processEndOfUpstream(portId: Int): Unit = { try { - outputManager.outputIterator.setTupleOutput( - executor.onFinishMultiPort(portId) - ) val outputState = executor.onFinishProduceState(portId) if (outputState.isDefined) { outputManager.emitMarker(outputState.get) } + outputManager.outputIterator.setTupleOutput( + executor.onFinishMultiPort(portId) + ) } catch safely { case e => // forward input tuple to the user and pause DP thread From 1887bfbe1127ac210fb3e0824e20afcc0cc513d4 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Sun, 15 Sep 2024 21:32:19 -0700 Subject: [PATCH 104/163] update --- .../sendsemantics/hash_based_shuffle_partitioner.py | 5 ++--- .../architecture/sendsemantics/one_to_one_partitioner.py | 6 ++---- .../architecture/sendsemantics/round_robin_partitioner.py | 2 +- core/amber/src/main/python/core/runnables/main_loop.py | 2 +- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py index 5678e0cfe5a..e76010bd1f6 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py @@ -3,10 +3,9 @@ from loguru import logger from overrides import overrides -from copy import deepcopy from core.architecture.sendsemantics.partitioner import Partitioner -from core.models import Tuple, State -from core.models.marker import EndOfUpstream, Marker +from core.models import Tuple +from core.models.marker import Marker from core.util import set_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import ( HashBasedShufflePartitioning, diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py index cd7e452609e..9281b603b0e 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py @@ -2,11 +2,9 @@ from typing import Iterator from overrides import overrides - -from copy import deepcopy from core.architecture.sendsemantics.partitioner import Partitioner -from core.models import Tuple, State -from core.models.marker import EndOfUpstream, Marker +from core.models import Tuple +from core.models.marker import Marker from core.util import set_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import ( OneToOnePartitioning, diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py index 01cbbb9a1da..efedbd2c4cc 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py @@ -3,7 +3,6 @@ from overrides import overrides -from copy import deepcopy from core.architecture.sendsemantics.partitioner import Partitioner from core.models import Tuple, State from core.models.marker import EndOfUpstream, Marker @@ -43,4 +42,5 @@ def flush( for receiver, batch in self.receivers: if len(batch) > 0: yield receiver, batch + batch.clear() yield receiver, marker diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index 87e3589b9d2..4e8358e5365 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -210,8 +210,8 @@ def _process_tuple(self, tuple_: Tuple) -> None: def _process_state(self, state_: State): self.context.marker_processing_manager.current_input_marker = state_ + self.process_input_state() self._check_and_process_control() - self._switch_context() def _process_start_of_upstream(self, start_of_upstream: StartOfUpstream): self.context.marker_processing_manager.current_input_marker = start_of_upstream From 083e8666df654fe4c6d3bd41b9fa579638c596fe Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 16 Sep 2024 04:03:56 -0700 Subject: [PATCH 105/163] update --- .../src/main/python/core/models/marker.py | 3 ++- .../pythonworker/PythonProxyServer.scala | 2 +- .../worker/promisehandlers/StartHandler.scala | 2 +- .../ics/texera/workflow/common/Marker.scala | 24 +++++++++---------- .../operators/state/DataToStateOpExec.scala | 2 +- 5 files changed, 16 insertions(+), 17 deletions(-) diff --git a/core/amber/src/main/python/core/models/marker.py b/core/amber/src/main/python/core/models/marker.py index 7a679802624..1c92c803eda 100644 --- a/core/amber/src/main/python/core/models/marker.py +++ b/core/amber/src/main/python/core/models/marker.py @@ -20,10 +20,11 @@ class EndOfUpstream(Marker): @dataclass class State(Marker): - def __init__(self, table: Optional[Table] = None): + def __init__(self, table: Optional[Table] = None, pass_to_all_downstream: bool = False): if table is None: self.data = {} self.schema = Schema() + self.add("passToAllDownstream", pass_to_all_downstream) else: self.data = table.to_pandas().iloc[0].to_dict() self.schema = Schema(table.schema) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala index ad7574518d0..081af0dd4bd 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala @@ -111,7 +111,7 @@ private class AmberProducer( outputPort.sendTo(to, MarkerFrame(EndOfUpstream())) } else if (dataHeader.payloadType == State().getClass.getSimpleName) { assert(root.getRowCount == 1) - outputPort.sendTo(to, MarkerFrame(State().fromTuple(ArrowUtils.getTexeraTuple(0, root)))) + outputPort.sendTo(to, MarkerFrame(State(Some(ArrowUtils.getTexeraTuple(0, root))))) } else { // normal data batches val queue = mutable.Queue[Tuple]() diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala index cf40d028a13..6d8020d1209 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala @@ -11,7 +11,7 @@ import edu.uci.ics.amber.engine.common.rpc.AsyncRPCServer.ControlCommand import edu.uci.ics.amber.engine.common.virtualidentity.ChannelIdentity import edu.uci.ics.amber.engine.common.virtualidentity.util.SOURCE_STARTER_ACTOR import edu.uci.ics.amber.engine.common.workflow.PortIdentity -import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfUpstream, State} +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfUpstream} object StartHandler { final case class StartWorker() extends ControlCommand[WorkerState] diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index 1889f2eec3c..f4792126812 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -9,13 +9,19 @@ sealed trait Marker final case class StartOfUpstream() extends Marker final case class EndOfUpstream() extends Marker -final case class State() extends Marker { +final case class State(tuple: Option[Tuple] = None, passToAllDownstream: Boolean = false) extends Marker { val list: mutable.Map[String, (AttributeType, Any)] = mutable.HashMap() - - def add(key: String, value: Any, valueType: AttributeType): Unit = { - list.put(key, (valueType, value)) + if (tuple.isEmpty) { + add("passToAllDownstream", passToAllDownstream, AttributeType.BOOLEAN) + } + else { + tuple.get.getSchema.getAttributes.foreach { attribute => + add(attribute.getName, tuple.get.getField(attribute.getName), attribute.getType) + } } + def add(key: String, value: Any, valueType: AttributeType): Unit = list.put(key, (valueType, value)) + def get(key: String): Any = list(key)._2 def apply(key: String): Any = get(key) @@ -34,15 +40,7 @@ final case class State() extends Marker { .addSequentially(list.values.map(_._2).toArray) .build() - def fromTuple(tuple: Tuple): State = { - tuple.getSchema.getAttributes.foreach { attribute => - add(attribute.getName, tuple.getField(attribute.getName), attribute.getType) - } - this - } - def size: Int = list.size - override def toString: String = - list.map { case (key, (_, value)) => s"$key: $value" }.mkString(", ") + override def toString: String = list.map { case (key, (_, value)) => s"$key: $value" }.mkString(", ") } \ No newline at end of file diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala index a4423ec99ae..9ec6fb1e082 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala @@ -22,7 +22,7 @@ class DataToStateOpExec extends OperatorExecutor { Iterator() } - override def onFinishProduceState(port: Int): Option[State] = Some(State().fromTuple(stateTuple)) + override def onFinishProduceState(port: Int): Option[State] = Some(State(Some(stateTuple))) override def onFinish(port: Int): Iterator[TupleLike] = buffer.iterator } From 87e305a2042b89b50d003695dd6fd93717a507ec Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 16 Sep 2024 04:18:06 -0700 Subject: [PATCH 106/163] update --- core/amber/src/main/python/core/models/marker.py | 3 +++ core/amber/src/main/python/core/models/operator.py | 7 ++++--- .../workflow/operators/state/StateToDataOpDesc.scala | 7 +++++-- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/core/amber/src/main/python/core/models/marker.py b/core/amber/src/main/python/core/models/marker.py index 1c92c803eda..f018f12c571 100644 --- a/core/amber/src/main/python/core/models/marker.py +++ b/core/amber/src/main/python/core/models/marker.py @@ -39,6 +39,9 @@ def add(self, key: str, value: any, value_type: Optional[AttributeType] = None) def get(self, key: str) -> any: return self.data[key] + def pass_to_all_downstream(self) -> bool: + return self.data["passToAllDownstream"] + def to_table(self) -> Table: return Table.from_pandas(df=DataFrame([self.data]), schema=self.schema.as_arrow_schema(),) diff --git a/core/amber/src/main/python/core/models/operator.py b/core/amber/src/main/python/core/models/operator.py index 5ffd3b7c119..a511b39f738 100644 --- a/core/amber/src/main/python/core/models/operator.py +++ b/core/amber/src/main/python/core/models/operator.py @@ -48,14 +48,15 @@ def close(self) -> None: """ pass - def process_state(self, state_: State, port: int) -> State: + def process_state(self, state: State, port: int) -> Optional[State]: """ Process an input State from the given link. - :param state_: State, a State from an input port to be processed. + :param state: State, a State from an input port to be processed. :param port: int, input port index of the current exhausted port. :return: State, producing one State object """ - return state_ + if state.pass_to_all_downstream(): + return state def produce_state_on_start(self, port: int) -> State: """ diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala index 053dafe92c1..b2fd3762a68 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala @@ -8,7 +8,7 @@ import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, Workf import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort, PortIdentity} import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} import edu.uci.ics.texera.workflow.common.operators.LogicalOp -import edu.uci.ics.texera.workflow.common.tuple.schema.{Attribute, Schema} +import edu.uci.ics.texera.workflow.common.tuple.schema.{Attribute, AttributeType, Schema} class StateToDataOpDesc extends LogicalOp { @JsonProperty @@ -57,6 +57,9 @@ class StateToDataOpDesc extends LogicalOp { override def getOutputSchema(schemas: Array[Schema]): Schema = throw new NotImplementedError() override def getOutputSchemas(schemas: Array[Schema]): Array[Schema] = - Array(Schema.builder().add(outputColumns).build(), schemas(0)) + Array( + Schema.builder().add(outputColumns).add("passToAllDownstream", AttributeType.BOOLEAN).build(), + schemas(0) + ) } From 59aa9a9276edc27f441ed8d444e45f466d35b5b3 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 16 Sep 2024 04:30:26 -0700 Subject: [PATCH 107/163] update --- core/amber/src/main/python/core/models/marker.py | 2 +- .../amber/src/main/python/core/models/operator.py | 2 +- .../architecture/worker/DataProcessor.scala | 15 ++++++++++++--- .../uci/ics/texera/workflow/common/Marker.scala | 2 ++ .../common/operators/OperatorExecutor.scala | 15 +++++++++++---- .../operators/state/DataToStateOpExec.scala | 2 +- .../state/DualInputStateReceiverOpExec.scala | 10 +++++----- .../operators/state/ProduceStateOpExec.scala | 2 +- .../operators/state/StateReceiverOpExec.scala | 4 ++-- .../operators/state/StateToDataOpExec.scala | 7 ++++--- 10 files changed, 40 insertions(+), 21 deletions(-) diff --git a/core/amber/src/main/python/core/models/marker.py b/core/amber/src/main/python/core/models/marker.py index f018f12c571..52e030a359b 100644 --- a/core/amber/src/main/python/core/models/marker.py +++ b/core/amber/src/main/python/core/models/marker.py @@ -39,7 +39,7 @@ def add(self, key: str, value: any, value_type: Optional[AttributeType] = None) def get(self, key: str) -> any: return self.data[key] - def pass_to_all_downstream(self) -> bool: + def is_pass_to_all_downstream(self) -> bool: return self.data["passToAllDownstream"] def to_table(self) -> Table: diff --git a/core/amber/src/main/python/core/models/operator.py b/core/amber/src/main/python/core/models/operator.py index a511b39f738..98e146eb8ce 100644 --- a/core/amber/src/main/python/core/models/operator.py +++ b/core/amber/src/main/python/core/models/operator.py @@ -55,7 +55,7 @@ def process_state(self, state: State, port: int) -> Optional[State]: :param port: int, input port index of the current exhausted port. :return: State, producing one State object """ - if state.pass_to_all_downstream(): + if state.is_pass_to_all_downstream(): return state def produce_state_on_start(self, port: int) -> State: diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index 4e621d734c0..fc4f2002e18 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -94,7 +94,16 @@ class DataProcessor( } private[this] def processInputState(state: State, port: Int): Unit = { - outputManager.emitMarker(executor.processState(state, port)) + try { + val outputState = executor.processState(state, port) + if (outputState.isDefined) { + outputManager.emitMarker(outputState.get) + } + } catch safely { + case e => + // forward input tuple to the user and pause DP thread + handleExecutorException(e) + } } /** @@ -104,7 +113,7 @@ class DataProcessor( private[this] def processStartOfUpstream(portId: Int): Unit = { try { outputManager.emitMarker(StartOfUpstream()) - val outputState = executor.onStartProduceState(portId) + val outputState = executor.produceStateOnStart(portId) if (outputState.isDefined) { outputManager.emitMarker(outputState.get) } @@ -121,7 +130,7 @@ class DataProcessor( */ private[this] def processEndOfUpstream(portId: Int): Unit = { try { - val outputState = executor.onFinishProduceState(portId) + val outputState = executor.produceStateOnFinish(portId) if (outputState.isDefined) { outputManager.emitMarker(outputState.get) } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index f4792126812..5a07e023f72 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -24,6 +24,8 @@ final case class State(tuple: Option[Tuple] = None, passToAllDownstream: Boolean def get(key: String): Any = list(key)._2 + def isPassToAllDownstream: Boolean = get("passToAllDownstream").asInstanceOf[Boolean] + def apply(key: String): Any = get(key) def toTuple: Tuple = diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala index 9c80fac4e17..2f2f86a75bb 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala @@ -9,9 +9,16 @@ trait OperatorExecutor { def open(): Unit = {} - def onStartProduceState(port: Int): Option[State] = None - - def processState(state: State, port: Int): State = state + def produceStateOnStart(port: Int): Option[State] = None + + def processState(state: State, port: Int): Option[State] = { + if (state.isPassToAllDownstream) { + Some(state) + } + else { + None + } + } def processTupleMultiPort( tuple: Tuple, @@ -22,7 +29,7 @@ trait OperatorExecutor { def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] - def onFinishProduceState(port: Int): Option[State] = None + def produceStateOnFinish(port: Int): Option[State] = None def onFinishMultiPort(port: Int): Iterator[(TupleLike, Option[PortIdentity])] = { onFinish(port).map(t => (t, None)) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala index 9ec6fb1e082..ac2758d3b4f 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala @@ -22,7 +22,7 @@ class DataToStateOpExec extends OperatorExecutor { Iterator() } - override def onFinishProduceState(port: Int): Option[State] = Some(State(Some(stateTuple))) + override def produceStateOnFinish(port: Int): Option[State] = Some(State(Some(stateTuple))) override def onFinish(port: Int): Iterator[TupleLike] = buffer.iterator } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpExec.scala index ea6ad205f4b..2d2c66c9124 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpExec.scala @@ -13,14 +13,14 @@ class DualInputStateReceiverOpExec extends OperatorExecutor { } override def processTupleMultiPort( - tuple: Tuple, - port: Int - ): Iterator[(TupleLike, Option[PortIdentity])] = { + tuple: Tuple, + port: Int + ): Iterator[(TupleLike, Option[PortIdentity])] = { processTuple(tuple, port).map(t => (t, None)) } - override def processState(state: State, port: Int): State = { + override def processState(state: State, port: Int): Option[State] = { println(port, state) - state + Some(state) } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpExec.scala index d86c993619e..76b153dab45 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpExec.scala @@ -8,7 +8,7 @@ import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType class ProduceStateOpExec extends OperatorExecutor { - override def onStartProduceState(port: Int): Option[State] = { + override def produceStateOnStart(port: Int): Option[State] = { val state = State() state.add("i", 1, AttributeType.INTEGER) Some(state) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpExec.scala index 41af2291e6a..3ccb94385f9 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpExec.scala @@ -11,8 +11,8 @@ class StateReceiverOpExec extends OperatorExecutor { Iterator(tuple) } - override def processState(state: State, port: Int): State = { + override def processState(state: State, port: Int): Option[State] = { println(state) - state + Some(state) } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala index 692309a9468..e343afb6d2c 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala @@ -9,10 +9,10 @@ import edu.uci.ics.texera.workflow.common.tuple.Tuple class StateToDataOpExec extends OperatorExecutor { private var stateTuple: Tuple = _ - override def processState(state: State, port: Int): State = { + override def processState(state: State, port: Int): Option[State] = { if (state.size > 0) stateTuple = state.toTuple - State() + Some(State()) } override def processTupleMultiPort( @@ -28,5 +28,6 @@ class StateToDataOpExec extends OperatorExecutor { } } - override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = throw new NotImplementedError() + override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = + throw new NotImplementedError() } From d603c4184e5631aee07d67657876e0b1c8c092e7 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 16 Sep 2024 18:49:59 -0700 Subject: [PATCH 108/163] update --- .../operators/state/DataToStateOpDesc.scala | 46 ------------- .../operators/state/DataToStateOpExec.scala | 28 -------- .../state/DualInputStateReceiverOpDesc.scala | 42 ------------ .../state/DualInputStateReceiverOpExec.scala | 26 -------- .../operators/state/ProduceStateOpDesc.scala | 42 ------------ .../operators/state/ProduceStateOpExec.scala | 21 ------ .../operators/state/StateReceiverOpDesc.scala | 42 ------------ .../operators/state/StateReceiverOpExec.scala | 18 ----- .../operators/state/StateToDataOpDesc.scala | 65 ------------------- .../operators/state/StateToDataOpExec.scala | 33 ---------- 10 files changed, 363 deletions(-) delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpDesc.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpExec.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpDesc.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpExec.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpDesc.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpExec.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala delete mode 100644 core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala deleted file mode 100644 index c8733f410ef..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpDesc.scala +++ /dev/null @@ -1,46 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import edu.uci.ics.amber.engine.architecture.deploysemantics.{PhysicalOp, SchemaPropagationFunc} -import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo -import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} -import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort, PortIdentity} -import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} -import edu.uci.ics.texera.workflow.common.operators.LogicalOp -import edu.uci.ics.texera.workflow.common.tuple.schema.Schema - -class DataToStateOpDesc extends LogicalOp { - override def getPhysicalOp( - workflowId: WorkflowIdentity, - executionId: ExecutionIdentity - ): PhysicalOp = { - PhysicalOp - .oneToOnePhysicalOp( - workflowId, - executionId, - operatorIdentifier, - OpExecInitInfo((_, _) => { - new DataToStateOpExec() - }) - ) - .withInputPorts(operatorInfo.inputPorts) - .withOutputPorts(operatorInfo.outputPorts) - .withPropagateSchema( - SchemaPropagationFunc(inputSchemas => Map(PortIdentity() -> inputSchemas(PortIdentity(1)))) - ) - .withSuggestedWorkerNum(1) - } - - override def operatorInfo: OperatorInfo = - OperatorInfo( - "Data To State", - "Convert Data to State", - OperatorGroupConstants.UTILITY_GROUP, - inputPorts = List( - InputPort(PortIdentity(), "State"), - InputPort(PortIdentity(1), "Data", dependencies = List(PortIdentity())) - ), - outputPorts = List(OutputPort()) - ) - - override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(1) -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala deleted file mode 100644 index ac2758d3b4f..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DataToStateOpExec.scala +++ /dev/null @@ -1,28 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike -import edu.uci.ics.texera.workflow.common.State -import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor -import edu.uci.ics.texera.workflow.common.tuple.Tuple - -import scala.collection.mutable - -class DataToStateOpExec extends OperatorExecutor { - private val buffer = new mutable.ArrayBuffer[Tuple]() - private var stateTuple: Tuple = _ - - override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { - port match { - case 0 => - if (stateTuple == null) - stateTuple = tuple - case 1 => - buffer += tuple - } - Iterator() - } - - override def produceStateOnFinish(port: Int): Option[State] = Some(State(Some(stateTuple))) - - override def onFinish(port: Int): Iterator[TupleLike] = buffer.iterator -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpDesc.scala deleted file mode 100644 index dfa5bc434a8..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpDesc.scala +++ /dev/null @@ -1,42 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp -import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo -import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} -import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort, PortIdentity} -import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} -import edu.uci.ics.texera.workflow.common.operators.LogicalOp -import edu.uci.ics.texera.workflow.common.tuple.schema.Schema - -class DualInputStateReceiverOpDesc extends LogicalOp { - - override def getPhysicalOp( - workflowId: WorkflowIdentity, - executionId: ExecutionIdentity - ): PhysicalOp = { - PhysicalOp - .oneToOnePhysicalOp( - workflowId, - executionId, - operatorIdentifier, - OpExecInitInfo((_, _) => { - new DualInputStateReceiverOpExec() - }) - ) - .withInputPorts(operatorInfo.inputPorts) - .withOutputPorts(operatorInfo.outputPorts) - .withParallelizable(false) - } - - override def operatorInfo: OperatorInfo = - OperatorInfo( - "2 in Testing State Receiver", - "", - OperatorGroupConstants.UTILITY_GROUP, - inputPorts = List(InputPort(PortIdentity(0)), InputPort(PortIdentity(1))), - outputPorts = List(OutputPort()), - supportReconfiguration = true - ) - - override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(0) -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpExec.scala deleted file mode 100644 index 2d2c66c9124..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/DualInputStateReceiverOpExec.scala +++ /dev/null @@ -1,26 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike -import edu.uci.ics.amber.engine.common.workflow.PortIdentity -import edu.uci.ics.texera.workflow.common.State -import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor -import edu.uci.ics.texera.workflow.common.tuple.Tuple - -class DualInputStateReceiverOpExec extends OperatorExecutor { - - override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { - Iterator(tuple) - } - - override def processTupleMultiPort( - tuple: Tuple, - port: Int - ): Iterator[(TupleLike, Option[PortIdentity])] = { - processTuple(tuple, port).map(t => (t, None)) - } - - override def processState(state: State, port: Int): Option[State] = { - println(port, state) - Some(state) - } -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpDesc.scala deleted file mode 100644 index d282497524b..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpDesc.scala +++ /dev/null @@ -1,42 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp -import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo -import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} -import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort} -import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} -import edu.uci.ics.texera.workflow.common.operators.LogicalOp -import edu.uci.ics.texera.workflow.common.tuple.schema.Schema - -class ProduceStateOpDesc extends LogicalOp { - - override def getPhysicalOp( - workflowId: WorkflowIdentity, - executionId: ExecutionIdentity - ): PhysicalOp = { - PhysicalOp - .oneToOnePhysicalOp( - workflowId, - executionId, - operatorIdentifier, - OpExecInitInfo((_, _) => { - new ProduceStateOpExec() - }) - ) - .withInputPorts(operatorInfo.inputPorts) - .withOutputPorts(operatorInfo.outputPorts) - .withParallelizable(false) - } - - override def operatorInfo: OperatorInfo = - OperatorInfo( - "Produce State", - "", - OperatorGroupConstants.UTILITY_GROUP, - inputPorts = List(InputPort()), - outputPorts = List(OutputPort()), - supportReconfiguration = true - ) - - override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(0) -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpExec.scala deleted file mode 100644 index 76b153dab45..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/ProduceStateOpExec.scala +++ /dev/null @@ -1,21 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike -import edu.uci.ics.texera.workflow.common.State -import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor -import edu.uci.ics.texera.workflow.common.tuple.Tuple -import edu.uci.ics.texera.workflow.common.tuple.schema.AttributeType - -class ProduceStateOpExec extends OperatorExecutor { - - override def produceStateOnStart(port: Int): Option[State] = { - val state = State() - state.add("i", 1, AttributeType.INTEGER) - Some(state) - } - - override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { - Iterator(tuple) - } - -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpDesc.scala deleted file mode 100644 index 346084072fc..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpDesc.scala +++ /dev/null @@ -1,42 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import edu.uci.ics.amber.engine.architecture.deploysemantics.PhysicalOp -import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo -import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} -import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort} -import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} -import edu.uci.ics.texera.workflow.common.operators.LogicalOp -import edu.uci.ics.texera.workflow.common.tuple.schema.Schema - -class StateReceiverOpDesc extends LogicalOp { - - override def getPhysicalOp( - workflowId: WorkflowIdentity, - executionId: ExecutionIdentity - ): PhysicalOp = { - PhysicalOp - .oneToOnePhysicalOp( - workflowId, - executionId, - operatorIdentifier, - OpExecInitInfo((_, _) => { - new StateReceiverOpExec() - }) - ) - .withInputPorts(operatorInfo.inputPorts) - .withOutputPorts(operatorInfo.outputPorts) - .withParallelizable(false) - } - - override def operatorInfo: OperatorInfo = - OperatorInfo( - "Testing State Receiver", - "", - OperatorGroupConstants.UTILITY_GROUP, - inputPorts = List(InputPort()), - outputPorts = List(OutputPort()), - supportReconfiguration = true - ) - - override def getOutputSchema(schemas: Array[Schema]): Schema = schemas(0) -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpExec.scala deleted file mode 100644 index 3ccb94385f9..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateReceiverOpExec.scala +++ /dev/null @@ -1,18 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike -import edu.uci.ics.texera.workflow.common.State -import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor -import edu.uci.ics.texera.workflow.common.tuple.Tuple - -class StateReceiverOpExec extends OperatorExecutor { - - override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = { - Iterator(tuple) - } - - override def processState(state: State, port: Int): Option[State] = { - println(state) - Some(state) - } -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala deleted file mode 100644 index b2fd3762a68..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpDesc.scala +++ /dev/null @@ -1,65 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription} -import com.kjetland.jackson.jsonSchema.annotations.JsonSchemaTitle -import edu.uci.ics.amber.engine.architecture.deploysemantics.{PhysicalOp, SchemaPropagationFunc} -import edu.uci.ics.amber.engine.architecture.deploysemantics.layer.OpExecInitInfo -import edu.uci.ics.amber.engine.common.virtualidentity.{ExecutionIdentity, WorkflowIdentity} -import edu.uci.ics.amber.engine.common.workflow.{InputPort, OutputPort, PortIdentity} -import edu.uci.ics.texera.workflow.common.metadata.{OperatorGroupConstants, OperatorInfo} -import edu.uci.ics.texera.workflow.common.operators.LogicalOp -import edu.uci.ics.texera.workflow.common.tuple.schema.{Attribute, AttributeType, Schema} - -class StateToDataOpDesc extends LogicalOp { - @JsonProperty - @JsonSchemaTitle("State output column(s)") - @JsonPropertyDescription( - "Name of the newly added output columns that the UDF will produce, if any" - ) - var outputColumns: List[Attribute] = List() - - override def getPhysicalOp( - workflowId: WorkflowIdentity, - executionId: ExecutionIdentity - ): PhysicalOp = { - PhysicalOp - .oneToOnePhysicalOp( - workflowId, - executionId, - operatorIdentifier, - OpExecInitInfo((_, _) => { - new StateToDataOpExec() - }) - ) - .withInputPorts(operatorInfo.inputPorts) - .withOutputPorts(operatorInfo.outputPorts) - .withPropagateSchema( - SchemaPropagationFunc(inputSchemas => - getOutputSchemas( - operatorInfo.inputPorts.map(port => inputSchemas(port.id)).toArray - ).zipWithIndex.map { - case (schema, index) => PortIdentity(index) -> schema - }.toMap - ) - ) - .withSuggestedWorkerNum(1) - } - - override def operatorInfo: OperatorInfo = - OperatorInfo( - "State To Data", - "Convert State to Data", - OperatorGroupConstants.UTILITY_GROUP, - inputPorts = List(InputPort()), - outputPorts = List(OutputPort(PortIdentity(), "State"), OutputPort(PortIdentity(1), "Data")) - ) - - override def getOutputSchema(schemas: Array[Schema]): Schema = throw new NotImplementedError() - - override def getOutputSchemas(schemas: Array[Schema]): Array[Schema] = - Array( - Schema.builder().add(outputColumns).add("passToAllDownstream", AttributeType.BOOLEAN).build(), - schemas(0) - ) - -} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala deleted file mode 100644 index e343afb6d2c..00000000000 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/operators/state/StateToDataOpExec.scala +++ /dev/null @@ -1,33 +0,0 @@ -package edu.uci.ics.texera.workflow.operators.state - -import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike -import edu.uci.ics.amber.engine.common.workflow.PortIdentity -import edu.uci.ics.texera.workflow.common.State -import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor -import edu.uci.ics.texera.workflow.common.tuple.Tuple - -class StateToDataOpExec extends OperatorExecutor { - private var stateTuple: Tuple = _ - - override def processState(state: State, port: Int): Option[State] = { - if (state.size > 0) - stateTuple = state.toTuple - Some(State()) - } - - override def processTupleMultiPort( - tuple: Tuple, - port: Int - ): Iterator[(TupleLike, Option[PortIdentity])] = { - if (stateTuple != null) { - val outputTuple = stateTuple - stateTuple = null - Array((outputTuple, Some(PortIdentity())), (tuple, Some(PortIdentity(1)))).iterator - } else { - Iterator((tuple, Some(PortIdentity(1)))) - } - } - - override def processTuple(tuple: Tuple, port: Int): Iterator[TupleLike] = - throw new NotImplementedError() -} From 30be7db74aeff179410d639850a7d0052c8ba7d9 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 16 Sep 2024 18:52:57 -0700 Subject: [PATCH 109/163] fix format --- .../texera/workflow/common/operators/LogicalOp.scala | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala index b858aa7bdc4..b3c23f5f2bc 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala @@ -140,13 +140,6 @@ import edu.uci.ics.texera.workflow.operators.visualization.ternaryPlot.TernaryPl import org.apache.commons.lang3.builder.{EqualsBuilder, HashCodeBuilder, ToStringBuilder} import org.apache.zookeeper.KeeperException.UnimplementedException import edu.uci.ics.texera.workflow.operators.machineLearning.Scorer.MachineLearningScorerOpDesc -import edu.uci.ics.texera.workflow.operators.state.{ - DataToStateOpDesc, - DualInputStateReceiverOpDesc, - ProduceStateOpDesc, - StateReceiverOpDesc, - StateToDataOpDesc -} import edu.uci.ics.texera.workflow.operators.visualization.quiverPlot.QuiverPlotOpDesc import edu.uci.ics.texera.workflow.operators.visualization.contourPlot.ContourPlotOpDesc import edu.uci.ics.texera.workflow.operators.visualization.figureFactoryTable.FigureFactoryTableOpDesc @@ -209,11 +202,6 @@ trait StateTransferFunc new Type(value = classOf[AsterixDBSourceOpDesc], name = "AsterixDBSource"), new Type(value = classOf[TypeCastingOpDesc], name = "TypeCasting"), new Type(value = classOf[LimitOpDesc], name = "Limit"), - new Type(value = classOf[DataToStateOpDesc], name = "DataToState"), - new Type(value = classOf[StateToDataOpDesc], name = "StateToData"), - new Type(value = classOf[StateReceiverOpDesc], name = "TestingStateReceiver"), - new Type(value = classOf[DualInputStateReceiverOpDesc], name = "DualInputTestingStateReceiver"), - new Type(value = classOf[ProduceStateOpDesc], name = "ProduceStateOpDesc"), new Type(value = classOf[RandomKSamplingOpDesc], name = "RandomKSampling"), new Type(value = classOf[ReservoirSamplingOpDesc], name = "ReservoirSampling"), new Type(value = classOf[HashJoinOpDesc[String]], name = "HashJoin"), From 4cc7e76c128a481620c02ccbeec795ec0fa1fc61 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 16 Sep 2024 18:53:56 -0700 Subject: [PATCH 110/163] fix format --- .../pythonworker/PythonProxyClient.scala | 7 ++++--- .../uci/ics/texera/workflow/common/Marker.scala | 14 ++++++++------ .../common/operators/OperatorExecutor.scala | 3 +-- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala index 935085a1c2c..a486a377430 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyClient.scala @@ -17,7 +17,7 @@ import edu.uci.ics.amber.engine.common.ambermessage.InvocationConvertUtils.{ import edu.uci.ics.amber.engine.common.ambermessage.{PythonControlMessage, _} import edu.uci.ics.amber.engine.common.rpc.AsyncRPCClient.{ControlInvocation, ReturnInvocation} import edu.uci.ics.amber.engine.common.virtualidentity.ActorVirtualIdentity -import edu.uci.ics.texera.workflow.common.{EndOfUpstream, State} +import edu.uci.ics.texera.workflow.common.State import edu.uci.ics.texera.workflow.common.tuple.Tuple import edu.uci.ics.texera.workflow.common.tuple.schema.Schema import org.apache.arrow.flight._ @@ -105,8 +105,9 @@ class PythonProxyClient(portNumberPromise: Promise[Int], val actorId: ActorVirtu case DataFrame(frame) => writeArrowStream(mutable.Queue(frame: _*), from, "Data") case MarkerFrame(marker) => marker match { - case state: State => writeArrowStream(mutable.Queue(state.toTuple), from, marker.getClass.getSimpleName) - case _ => writeArrowStream(mutable.Queue.empty, from, marker.getClass.getSimpleName) + case state: State => + writeArrowStream(mutable.Queue(state.toTuple), from, marker.getClass.getSimpleName) + case _ => writeArrowStream(mutable.Queue.empty, from, marker.getClass.getSimpleName) } } } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index 5a07e023f72..a652746b336 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -9,18 +9,19 @@ sealed trait Marker final case class StartOfUpstream() extends Marker final case class EndOfUpstream() extends Marker -final case class State(tuple: Option[Tuple] = None, passToAllDownstream: Boolean = false) extends Marker { +final case class State(tuple: Option[Tuple] = None, passToAllDownstream: Boolean = false) + extends Marker { val list: mutable.Map[String, (AttributeType, Any)] = mutable.HashMap() if (tuple.isEmpty) { add("passToAllDownstream", passToAllDownstream, AttributeType.BOOLEAN) - } - else { + } else { tuple.get.getSchema.getAttributes.foreach { attribute => add(attribute.getName, tuple.get.getField(attribute.getName), attribute.getType) } } - def add(key: String, value: Any, valueType: AttributeType): Unit = list.put(key, (valueType, value)) + def add(key: String, value: Any, valueType: AttributeType): Unit = + list.put(key, (valueType, value)) def get(key: String): Any = list(key)._2 @@ -44,5 +45,6 @@ final case class State(tuple: Option[Tuple] = None, passToAllDownstream: Boolean def size: Int = list.size - override def toString: String = list.map { case (key, (_, value)) => s"$key: $value" }.mkString(", ") -} \ No newline at end of file + override def toString: String = + list.map { case (key, (_, value)) => s"$key: $value" }.mkString(", ") +} diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala index 2f2f86a75bb..327e3a1adb9 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/OperatorExecutor.scala @@ -14,8 +14,7 @@ trait OperatorExecutor { def processState(state: State, port: Int): Option[State] = { if (state.isPassToAllDownstream) { Some(state) - } - else { + } else { None } } From dbe435d5b1687c519e249c1237395292fc92f12b Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 16 Sep 2024 19:53:32 -0700 Subject: [PATCH 111/163] fix format --- .../managers/marker_processing_manager.py | 3 ++- .../architecture/packaging/input_manager.py | 19 ++++++++++++------- .../architecture/packaging/output_manager.py | 4 ++-- .../sendsemantics/broad_cast_partitioner.py | 6 ++++-- .../hash_based_shuffle_partitioner.py | 8 +++++--- .../sendsemantics/one_to_one_partitioner.py | 6 ++++-- .../architecture/sendsemantics/partitioner.py | 6 ++++-- .../range_based_shuffle_partitioner.py | 8 +++++--- .../sendsemantics/round_robin_partitioner.py | 6 ++++-- .../src/main/python/core/models/__init__.py | 2 +- .../python/core/models/internal_marker.py | 2 ++ .../src/main/python/core/models/marker.py | 18 ++++++++++++++---- .../src/main/python/core/models/operator.py | 2 +- .../core/models/schema/attribute_type.py | 1 - .../python/core/runnables/data_processor.py | 17 ++++++++++------- .../main/python/core/runnables/main_loop.py | 9 ++++----- .../python/core/runnables/network_sender.py | 7 +++++-- .../amber/src/main/python/pyamber/__init__.py | 4 ++-- 18 files changed, 81 insertions(+), 47 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/managers/marker_processing_manager.py b/core/amber/src/main/python/core/architecture/managers/marker_processing_manager.py index 80ceb54a0f0..6a865544360 100644 --- a/core/amber/src/main/python/core/architecture/managers/marker_processing_manager.py +++ b/core/amber/src/main/python/core/architecture/managers/marker_processing_manager.py @@ -1,6 +1,7 @@ from typing import Optional from core.models.marker import State, Marker + class MarkerProcessingManager: def __init__(self): self.current_input_marker: Optional[Marker] = None @@ -12,4 +13,4 @@ def get_input_marker(self) -> Optional[State]: def get_output_state(self) -> Optional[State]: ret, self.current_output_state = self.current_output_state, None - return ret \ No newline at end of file + return ret diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index 564fca4e549..87ce2f75039 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -1,7 +1,12 @@ from typing import Iterator, Optional, Union, Dict, List from core.models import Tuple, ArrowTableTupleProvider, Schema -from core.models.internal_marker import EndOfAll, InternalMarker, SenderChange, StartOfAny +from core.models.internal_marker import ( + EndOfAll, + InternalMarker, + SenderChange, + StartOfAny, +) from core.models.marker import EndOfUpstream, State, StartOfUpstream from core.models.payload import DataFrame, DataPayload, MarkerFrame from proto.edu.uci.ics.amber.engine.common import ( @@ -108,12 +113,12 @@ def process_data_payload( elif isinstance(payload, MarkerFrame): if isinstance(payload.frame, State): yield payload.frame - if isinstance(payload.frame, StartOfUpstream): #StartOfInputChannel() + if isinstance(payload.frame, StartOfUpstream): # StartOfInputChannel() if not self.started: - yield StartOfAny() #StartOfOutputPorts() + yield StartOfAny() # StartOfOutputPorts() self.started = True - yield StartOfUpstream() #StartOfInputChannel() - if isinstance(payload.frame, EndOfUpstream): #EndOfInputChannel() + yield StartOfUpstream() # StartOfInputChannel() + if isinstance(payload.frame, EndOfUpstream): # EndOfInputChannel() channel = self._channels[self._current_channel_id] channel.complete() port_id = channel.port_id @@ -125,14 +130,14 @@ def process_data_payload( ) if port_completed: - yield EndOfUpstream() #EndOfInputPort() + yield EndOfUpstream() # EndOfInputPort() all_ports_completed = all( map(lambda port: port.is_completed(), self._ports.values()) ) if all_ports_completed: - yield EndOfAll() #EndOfOutputPorts() + yield EndOfAll() # EndOfOutputPorts() else: raise NotImplementedError() diff --git a/core/amber/src/main/python/core/architecture/packaging/output_manager.py b/core/amber/src/main/python/core/architecture/packaging/output_manager.py index 42cfc925d47..e7592e0ab45 100644 --- a/core/amber/src/main/python/core/architecture/packaging/output_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/output_manager.py @@ -99,7 +99,7 @@ def tuple_to_batch( ) def emit_marker( - self, marker: Marker + self, marker: Marker ) -> Iterable[typing.Tuple[ActorVirtualIdentity, DataPayload]]: return chain( *( @@ -127,4 +127,4 @@ def tuple_to_frame(self, tuples: typing.List[Tuple]) -> DataFrame: }, schema=self.get_port().get_schema().as_arrow_schema(), ) - ) \ No newline at end of file + ) diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/broad_cast_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/broad_cast_partitioner.py index 9a4939435ad..cc0fccbe3bc 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/broad_cast_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/broad_cast_partitioner.py @@ -35,8 +35,10 @@ def add_tuple_to_batch( @overrides def flush( - self, marker: Marker - ) -> Iterator[typing.Tuple[ActorVirtualIdentity, typing.Union[Marker, typing.List[Tuple]]]]: + self, marker: Marker + ) -> Iterator[ + typing.Tuple[ActorVirtualIdentity, typing.Union[Marker, typing.List[Tuple]]] + ]: if len(self.batch) > 0: for receiver in self.receivers: yield receiver, self.batch diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py index e76010bd1f6..f4e0942768c 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/hash_based_shuffle_partitioner.py @@ -43,9 +43,11 @@ def add_tuple_to_batch( @overrides def flush( - self, marker: Marker - ) -> Iterator[typing.Tuple[ActorVirtualIdentity, typing.Union[Marker, typing.List[Tuple]]]]: + self, marker: Marker + ) -> Iterator[ + typing.Tuple[ActorVirtualIdentity, typing.Union[Marker, typing.List[Tuple]]] + ]: for receiver, batch in self.receivers: if len(batch) > 0: yield receiver, batch - yield receiver, marker \ No newline at end of file + yield receiver, marker diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py index 9281b603b0e..1758363c0cb 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/one_to_one_partitioner.py @@ -34,8 +34,10 @@ def add_tuple_to_batch( @overrides def flush( - self, marker: Marker - ) -> Iterator[typing.Tuple[ActorVirtualIdentity, typing.Union[Marker, typing.List[Tuple]]]]: + self, marker: Marker + ) -> Iterator[ + typing.Tuple[ActorVirtualIdentity, typing.Union[Marker, typing.List[Tuple]]] + ]: if len(self.batch) > 0: yield self.receiver, self.batch self.reset() diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py index d32f4f71d39..7870da8d225 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py @@ -21,8 +21,10 @@ def add_tuple_to_batch( pass def flush( - self, marker: Marker - ) -> Iterator[typing.Tuple[ActorVirtualIdentity, typing.Union[Marker, typing.List[Tuple]]]]: + self, marker: Marker + ) -> Iterator[ + typing.Tuple[ActorVirtualIdentity, typing.Union[Marker, typing.List[Tuple]]] + ]: pass def reset(self) -> None: diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/range_based_shuffle_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/range_based_shuffle_partitioner.py index 8f6b882a3a1..cba8840823f 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/range_based_shuffle_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/range_based_shuffle_partitioner.py @@ -57,9 +57,11 @@ def add_tuple_to_batch( @overrides def flush( - self, marker: Marker - ) -> Iterator[typing.Tuple[ActorVirtualIdentity, typing.Union[Marker, typing.List[Tuple]]]]: + self, marker: Marker + ) -> Iterator[ + typing.Tuple[ActorVirtualIdentity, typing.Union[Marker, typing.List[Tuple]]] + ]: for receiver, batch in self.receivers: if len(batch) > 0: yield receiver, batch - yield receiver, marker \ No newline at end of file + yield receiver, marker diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py index efedbd2c4cc..27fd5e6d07a 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py @@ -37,8 +37,10 @@ def add_tuple_to_batch( @overrides def flush( - self, marker: Marker - ) -> Iterator[typing.Tuple[ActorVirtualIdentity, typing.Union[Marker, typing.List[Tuple]]]]: + self, marker: Marker + ) -> Iterator[ + typing.Tuple[ActorVirtualIdentity, typing.Union[Marker, typing.List[Tuple]]] + ]: for receiver, batch in self.receivers: if len(batch) > 0: yield receiver, batch diff --git a/core/amber/src/main/python/core/models/__init__.py b/core/amber/src/main/python/core/models/__init__.py index c39a55ac88c..a02d12ec1a0 100644 --- a/core/amber/src/main/python/core/models/__init__.py +++ b/core/amber/src/main/python/core/models/__init__.py @@ -50,5 +50,5 @@ class ExceptionInfo(NamedTuple): "AttributeType", "Field", "Schema", - "State" + "State", ] diff --git a/core/amber/src/main/python/core/models/internal_marker.py b/core/amber/src/main/python/core/models/internal_marker.py index 551f9330b65..9e1c53b0e1c 100644 --- a/core/amber/src/main/python/core/models/internal_marker.py +++ b/core/amber/src/main/python/core/models/internal_marker.py @@ -18,10 +18,12 @@ class InternalMarker: class SenderChange(InternalMarker): channel_id: ChannelIdentity + @dataclass class StartOfAny(InternalMarker): pass + @dataclass class EndOfAll(InternalMarker): pass diff --git a/core/amber/src/main/python/core/models/marker.py b/core/amber/src/main/python/core/models/marker.py index 52e030a359b..04ec2e99de7 100644 --- a/core/amber/src/main/python/core/models/marker.py +++ b/core/amber/src/main/python/core/models/marker.py @@ -10,17 +10,22 @@ class Marker: pass + @dataclass class StartOfUpstream(Marker): pass + @dataclass class EndOfUpstream(Marker): pass + @dataclass class State(Marker): - def __init__(self, table: Optional[Table] = None, pass_to_all_downstream: bool = False): + def __init__( + self, table: Optional[Table] = None, pass_to_all_downstream: bool = False + ): if table is None: self.data = {} self.schema = Schema() @@ -29,7 +34,9 @@ def __init__(self, table: Optional[Table] = None, pass_to_all_downstream: bool = self.data = table.to_pandas().iloc[0].to_dict() self.schema = Schema(table.schema) - def add(self, key: str, value: any, value_type: Optional[AttributeType] = None) -> None: + def add( + self, key: str, value: any, value_type: Optional[AttributeType] = None + ) -> None: self.data[key] = value if value_type is not None: self.schema.add(key, value_type) @@ -43,7 +50,10 @@ def is_pass_to_all_downstream(self) -> bool: return self.data["passToAllDownstream"] def to_table(self) -> Table: - return Table.from_pandas(df=DataFrame([self.data]), schema=self.schema.as_arrow_schema(),) + return Table.from_pandas( + df=DataFrame([self.data]), + schema=self.schema.as_arrow_schema(), + ) def __setitem__(self, key: str, value: any, value_type: AttributeType) -> None: self.add(key, value, value_type) @@ -57,4 +67,4 @@ def __str__(self) -> str: ) return f"State[{content}]" - __repr__ = __str__ \ No newline at end of file + __repr__ = __str__ diff --git a/core/amber/src/main/python/core/models/operator.py b/core/amber/src/main/python/core/models/operator.py index 98e146eb8ce..efdefe60b48 100644 --- a/core/amber/src/main/python/core/models/operator.py +++ b/core/amber/src/main/python/core/models/operator.py @@ -257,4 +257,4 @@ def process_tuple( :return: Iterator[Optional[TupleLike]], producing one TupleLike object at a time, or None. """ - yield \ No newline at end of file + yield diff --git a/core/amber/src/main/python/core/models/schema/attribute_type.py b/core/amber/src/main/python/core/models/schema/attribute_type.py index 09636cae205..e62508890d7 100644 --- a/core/amber/src/main/python/core/models/schema/attribute_type.py +++ b/core/amber/src/main/python/core/models/schema/attribute_type.py @@ -75,4 +75,3 @@ class AttributeType(Enum): bytes: AttributeType.BINARY, datetime.datetime: AttributeType.TIMESTAMP, } - diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index 010b6902f5b..a1e3e5950fb 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -45,8 +45,8 @@ def process_marker(self, marker: Marker) -> None: executor = self._context.executor_manager.executor port = self._context.tuple_processing_manager.get_input_port() with replace_print( - self._context.worker_id, - self._context.console_message_manager.print_buf, + self._context.worker_id, + self._context.console_message_manager.print_buf, ): if isinstance(marker, StartOfUpstream): self._set_output_state(executor.produce_state_on_start(port)) @@ -56,7 +56,6 @@ def process_marker(self, marker: Marker) -> None: self._set_output_state(executor.produce_state_on_finish(port)) self._set_output_tuple(executor.on_finish(port)) - except Exception as err: logger.exception(err) exc_info = sys.exc_info() @@ -74,8 +73,8 @@ def process_tuple(self) -> None: port = self._context.tuple_processing_manager.get_input_port() tuple_ = self._context.tuple_processing_manager.get_input_tuple() with replace_print( - self._context.worker_id, - self._context.console_message_manager.print_buf, + self._context.worker_id, + self._context.console_message_manager.print_buf, ): self._set_output_tuple(executor.process_tuple(tuple_, port)) @@ -93,8 +92,12 @@ def _set_output_tuple(self, output_iterator: Iterator[Optional[TupleLike]]) -> N # output could be a None, a TupleLike, or a TableLike. for output_tuple in all_output_to_tuple(output): if output_tuple is not None: - output_tuple.finalize(self._context.output_manager.get_port().get_schema()) - self._context.tuple_processing_manager.current_output_tuple = output_tuple + output_tuple.finalize( + self._context.output_manager.get_port().get_schema() + ) + self._context.tuple_processing_manager.current_output_tuple = ( + output_tuple + ) self._switch_context() self._context.tuple_processing_manager.finished_current.set() diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index 4e8358e5365..8552de4f433 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -167,7 +167,9 @@ def process_input_tuple(self) -> None: self.context.statistics_manager.increase_output_tuple_count( PortIdentity(0) ) - for (to, batch) in self.context.output_manager.tuple_to_batch(output_data): + for to, batch in self.context.output_manager.tuple_to_batch( + output_data + ): self._output_queue.put(DataElement(tag=to, payload=batch)) def process_input_state(self) -> None: @@ -175,10 +177,9 @@ def process_input_state(self) -> None: output_state = self.context.marker_processing_manager.get_output_state() self._switch_context() if output_state is not None: - for (to, batch) in self.context.output_manager.emit_marker(output_state): + for to, batch in self.context.output_manager.emit_marker(output_state): self._output_queue.put(DataElement(tag=to, payload=batch)) - def process_tuple_with_udf(self) -> Iterator[Optional[Tuple]]: """ Process the Tuple/InputExhausted with the current link. @@ -244,7 +245,6 @@ def _process_sender_change_marker(self, sender_change_marker: SenderChange) -> N self.context.input_manager.get_port_id(sender_change_marker.channel_id) ) - def _process_start_of_any_marker(self, _: StartOfAny) -> None: """ Upon receipt of an StartOfAllMarker, which indicates the start of any input links, @@ -256,7 +256,6 @@ def _process_start_of_any_marker(self, _: StartOfAny) -> None: self._output_queue.put(DataElement(tag=to, payload=batch)) self._check_and_process_control() - def _process_end_of_all_marker(self, _: EndOfAll) -> None: """ Upon receipt of an EndOfAllMarker, which indicates the end of all input links, diff --git a/core/amber/src/main/python/core/runnables/network_sender.py b/core/amber/src/main/python/core/runnables/network_sender.py index faffd40bcf5..b8c458192a9 100644 --- a/core/amber/src/main/python/core/runnables/network_sender.py +++ b/core/amber/src/main/python/core/runnables/network_sender.py @@ -61,12 +61,15 @@ def _send_data(self, to: ActorVirtualIdentity, data_payload: DataPayload) -> Non data_header = PythonDataHeader( tag=to, payload_type=data_payload.frame.__class__.__name__ ) - table = data_payload.frame.to_table() if isinstance(data_payload.frame, State) else None + table = ( + data_payload.frame.to_table() + if isinstance(data_payload.frame, State) + else None + ) self._proxy_client.send_data(bytes(data_header), table) else: raise TypeError(f"Unexpected payload {data_payload}") - @logger.catch(reraise=True) def _send_control( self, to: ActorVirtualIdentity, control_payload: ControlPayloadV2 diff --git a/core/amber/src/main/python/pyamber/__init__.py b/core/amber/src/main/python/pyamber/__init__.py index ab2b9750f32..4332806f6cb 100644 --- a/core/amber/src/main/python/pyamber/__init__.py +++ b/core/amber/src/main/python/pyamber/__init__.py @@ -10,7 +10,7 @@ BatchOperator, SourceOperator, TupleOperatorV2, - State + State, ) __all__ = [ @@ -25,5 +25,5 @@ "BatchOperator", "TupleOperatorV2", "SourceOperator", - "State" + "State", ] From fdee7738d0524d48a359d04c6a01f5a677129d38 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 16 Sep 2024 19:56:07 -0700 Subject: [PATCH 112/163] fix test --- .../architecture/worker/DataProcessorSpec.scala | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala index bbbc30308ce..491d72252c5 100644 --- a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala +++ b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala @@ -12,14 +12,9 @@ import edu.uci.ics.amber.engine.common.rpc.AsyncRPCClient.ControlInvocation import edu.uci.ics.amber.engine.common.rpc.AsyncRPCServer.ControlCommand import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike import edu.uci.ics.amber.engine.common.virtualidentity.util.CONTROLLER -import edu.uci.ics.amber.engine.common.virtualidentity.{ - ActorVirtualIdentity, - ChannelIdentity, - OperatorIdentity, - PhysicalOpIdentity -} +import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, ChannelIdentity, OperatorIdentity, PhysicalOpIdentity} import edu.uci.ics.amber.engine.common.workflow.PortIdentity -import edu.uci.ics.texera.workflow.common.EndOfUpstream +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfUpstream} import edu.uci.ics.texera.workflow.common.WorkflowContext.DEFAULT_WORKFLOW_ID import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple @@ -147,6 +142,10 @@ class DataProcessorSpec extends AnyFlatSpec with MockFactory with BeforeAndAfter ChannelIdentity(CONTROLLER, testWorkerId, isControl = true), ControlInvocation(0, OpenExecutor()) ) + dp.processDataPayload( + ChannelIdentity(senderWorkerId, testWorkerId, isControl = false), + MarkerFrame(StartOfUpstream()) + ) dp.processDataPayload( ChannelIdentity(senderWorkerId, testWorkerId, isControl = false), DataFrame(tuples) From e4594a8dcbc757c00935fac7dbed7b96807c8120 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 16 Sep 2024 20:03:25 -0700 Subject: [PATCH 113/163] fix test --- .../core/architecture/packaging/input_manager.py | 10 +++++----- .../src/main/python/core/models/__init__.py | 3 +-- .../main/python/core/models/internal_marker.py | 4 ++-- .../src/main/python/core/runnables/main_loop.py | 16 ++++++++-------- 4 files changed, 16 insertions(+), 17 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index 87ce2f75039..19380e7409d 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -2,10 +2,10 @@ from core.models import Tuple, ArrowTableTupleProvider, Schema from core.models.internal_marker import ( - EndOfAll, + EndOfOutputPorts, InternalMarker, SenderChange, - StartOfAny, + StartOfOutputPorts, ) from core.models.marker import EndOfUpstream, State, StartOfUpstream from core.models.payload import DataFrame, DataPayload, MarkerFrame @@ -88,7 +88,7 @@ def process_data_payload( # special case used to yield for source op if from_ == InputManager.SOURCE_STARTER: yield EndOfUpstream() - yield EndOfAll() + yield EndOfOutputPorts() return current_channel_id = None for channel_id, channel in self._channels.items(): @@ -115,7 +115,7 @@ def process_data_payload( yield payload.frame if isinstance(payload.frame, StartOfUpstream): # StartOfInputChannel() if not self.started: - yield StartOfAny() # StartOfOutputPorts() + yield StartOfOutputPorts() self.started = True yield StartOfUpstream() # StartOfInputChannel() if isinstance(payload.frame, EndOfUpstream): # EndOfInputChannel() @@ -137,7 +137,7 @@ def process_data_payload( ) if all_ports_completed: - yield EndOfAll() # EndOfOutputPorts() + yield EndOfOutputPorts() else: raise NotImplementedError() diff --git a/core/amber/src/main/python/core/models/__init__.py b/core/amber/src/main/python/core/models/__init__.py index a02d12ec1a0..a08116fca7a 100644 --- a/core/amber/src/main/python/core/models/__init__.py +++ b/core/amber/src/main/python/core/models/__init__.py @@ -2,7 +2,7 @@ from typing import NamedTuple from .internal_queue import InternalQueue -from .internal_marker import EndOfAll, InternalMarker, SenderChange +from .internal_marker import InternalMarker, SenderChange from .tuple import Tuple, TupleLike, ArrowTableTupleProvider from .table import Table, TableLike from .batch import Batch, BatchLike @@ -27,7 +27,6 @@ class ExceptionInfo(NamedTuple): __all__ = [ "InternalQueue", - "EndOfAll", "InternalMarker", "SenderChange", "Tuple", diff --git a/core/amber/src/main/python/core/models/internal_marker.py b/core/amber/src/main/python/core/models/internal_marker.py index 9e1c53b0e1c..7063c4fdff8 100644 --- a/core/amber/src/main/python/core/models/internal_marker.py +++ b/core/amber/src/main/python/core/models/internal_marker.py @@ -20,10 +20,10 @@ class SenderChange(InternalMarker): @dataclass -class StartOfAny(InternalMarker): +class StartOfOutputPorts(InternalMarker): pass @dataclass -class EndOfAll(InternalMarker): +class EndOfOutputPorts(InternalMarker): pass diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index 8552de4f433..77899a0e44a 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -9,7 +9,7 @@ from core.architecture.managers.context import Context from core.architecture.managers.pause_manager import PauseType -from core.architecture.packaging.input_manager import EndOfAll +from core.architecture.packaging.input_manager import EndOfOutputPorts from core.architecture.rpc.async_rpc_client import AsyncRPCClient from core.architecture.rpc.async_rpc_server import AsyncRPCServer from core.models import ( @@ -17,7 +17,7 @@ SenderChange, Tuple, ) -from core.models.internal_marker import StartOfAny +from core.models.internal_marker import StartOfOutputPorts from core.models.internal_queue import DataElement, ControlElement from core.models.marker import State, EndOfUpstream, StartOfUpstream from core.runnables.data_processor import DataProcessor @@ -245,7 +245,7 @@ def _process_sender_change_marker(self, sender_change_marker: SenderChange) -> N self.context.input_manager.get_port_id(sender_change_marker.channel_id) ) - def _process_start_of_any_marker(self, _: StartOfAny) -> None: + def _process_start_of_output_ports(self, _: StartOfOutputPorts) -> None: """ Upon receipt of an StartOfAllMarker, which indicates the start of any input links, send the StartOfUpstream to all downstream workers. @@ -256,7 +256,7 @@ def _process_start_of_any_marker(self, _: StartOfAny) -> None: self._output_queue.put(DataElement(tag=to, payload=batch)) self._check_and_process_control() - def _process_end_of_all_marker(self, _: EndOfAll) -> None: + def _process_end_of_output_ports(self, _: EndOfOutputPorts) -> None: """ Upon receipt of an EndOfAllMarker, which indicates the end of all input links, send the last data batches to all downstream workers. @@ -316,10 +316,10 @@ def _process_data_element(self, data_element: DataElement) -> None: self._process_end_of_upstream, SenderChange, self._process_sender_change_marker, - StartOfAny, - self._process_start_of_any_marker, - EndOfAll, - self._process_end_of_all_marker, + StartOfOutputPorts, + self._process_start_of_output_ports, + EndOfOutputPorts, + self._process_end_of_output_ports, State, self._process_state, ) From 760b590ac5bbf1be842c0f6f1f9a09d65379f617 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 16 Sep 2024 20:25:33 -0700 Subject: [PATCH 114/163] fix fmt --- .../core/architecture/managers/tuple_processing_manager.py | 4 ++-- .../architecture/sendsemantics/broad_cast_partitioner.py | 2 +- .../engine/architecture/worker/DataProcessorSpec.scala | 7 ++++++- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py index fcc9ce88552..a4b94f89d91 100644 --- a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py +++ b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py @@ -1,7 +1,7 @@ from threading import Event, Condition -from typing import Optional, Union, Tuple, Iterator +from typing import Optional, Tuple, Iterator -from core.models.marker import State, Marker +from core.models.marker import State from proto.edu.uci.ics.amber.engine.common import PortIdentity diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/broad_cast_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/broad_cast_partitioner.py index cc0fccbe3bc..407172975f0 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/broad_cast_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/broad_cast_partitioner.py @@ -5,7 +5,7 @@ from core.architecture.sendsemantics.partitioner import Partitioner from core.models import Tuple -from core.models.marker import EndOfUpstream, Marker +from core.models.marker import Marker from core.util import set_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import ( Partitioning, diff --git a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala index 491d72252c5..cdb65abebce 100644 --- a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala +++ b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala @@ -12,7 +12,12 @@ import edu.uci.ics.amber.engine.common.rpc.AsyncRPCClient.ControlInvocation import edu.uci.ics.amber.engine.common.rpc.AsyncRPCServer.ControlCommand import edu.uci.ics.amber.engine.common.tuple.amber.TupleLike import edu.uci.ics.amber.engine.common.virtualidentity.util.CONTROLLER -import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, ChannelIdentity, OperatorIdentity, PhysicalOpIdentity} +import edu.uci.ics.amber.engine.common.virtualidentity.{ + ActorVirtualIdentity, + ChannelIdentity, + OperatorIdentity, + PhysicalOpIdentity +} import edu.uci.ics.amber.engine.common.workflow.PortIdentity import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfUpstream} import edu.uci.ics.texera.workflow.common.WorkflowContext.DEFAULT_WORKFLOW_ID From a2783f3d697d549785301f2bf005d4e510c1f55b Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 16 Sep 2024 20:29:15 -0700 Subject: [PATCH 115/163] fix fmt --- .../core/architecture/sendsemantics/partitioner.py | 4 ++-- .../sendsemantics/range_based_shuffle_partitioner.py | 2 +- .../sendsemantics/round_robin_partitioner.py | 4 ++-- .../src/main/python/core/runnables/data_processor.py | 2 +- core/amber/src/main/python/core/runnables/main_loop.py | 9 +++++---- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py index 7870da8d225..e2ff2df34c7 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/partitioner.py @@ -4,8 +4,8 @@ from betterproto import Message -from core.models import Tuple, State -from core.models.marker import EndOfUpstream, Marker +from core.models import Tuple +from core.models.marker import Marker from core.util import get_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import Partitioning from proto.edu.uci.ics.amber.engine.common import ActorVirtualIdentity diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/range_based_shuffle_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/range_based_shuffle_partitioner.py index cba8840823f..31d0ccc6f87 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/range_based_shuffle_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/range_based_shuffle_partitioner.py @@ -6,7 +6,7 @@ from core.architecture.sendsemantics.partitioner import Partitioner from core.models import Tuple -from core.models.marker import EndOfUpstream, Marker +from core.models.marker import Marker from core.util import set_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import ( RangeBasedShufflePartitioning, diff --git a/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py b/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py index 27fd5e6d07a..47011051f4e 100644 --- a/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py +++ b/core/amber/src/main/python/core/architecture/sendsemantics/round_robin_partitioner.py @@ -4,8 +4,8 @@ from overrides import overrides from core.architecture.sendsemantics.partitioner import Partitioner -from core.models import Tuple, State -from core.models.marker import EndOfUpstream, Marker +from core.models import Tuple +from core.models.marker import Marker from core.util import set_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import ( Partitioning, diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index a1e3e5950fb..d1222786fde 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -6,7 +6,7 @@ from loguru import logger from typing import Iterator, Optional from core.architecture.managers import Context -from core.models import Tuple, ExceptionInfo, State, TupleLike +from core.models import ExceptionInfo, State, TupleLike from core.models.marker import Marker, StartOfUpstream, EndOfUpstream from core.models.table import all_output_to_tuple from core.util import Stoppable diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index 77899a0e44a..dc58cae9ef1 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -1,7 +1,7 @@ import threading import time import typing -from typing import Iterator, Optional, Union +from typing import Iterator, Optional from loguru import logger from overrides import overrides @@ -150,8 +150,8 @@ def process_control_payload( def process_input_tuple(self) -> None: """ - Process the current input tuple or state with the current input link. Send all result - Tuples or State to downstream workers. + Process the current input tuple or state with the current input link. + Send all result Tuples or State to downstream workers. This is being invoked for each Tuple/Marker that are unpacked from the DataElement. @@ -247,7 +247,8 @@ def _process_sender_change_marker(self, sender_change_marker: SenderChange) -> N def _process_start_of_output_ports(self, _: StartOfOutputPorts) -> None: """ - Upon receipt of an StartOfAllMarker, which indicates the start of any input links, + Upon receipt of an StartOfAllMarker, + which indicates the start of any input links, send the StartOfUpstream to all downstream workers. :param _: StartOfAny Internal Marker From 4e8c0274ada3989915a54641a154e2b5411f9384 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Tue, 17 Sep 2024 16:59:04 -0700 Subject: [PATCH 116/163] update --- .../architecture/packaging/input_manager.py | 8 ++++---- .../python/core/runnables/network_receiver.py | 19 +++++++++---------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index 19380e7409d..6418ffc2055 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -2,17 +2,17 @@ from core.models import Tuple, ArrowTableTupleProvider, Schema from core.models.internal_marker import ( - EndOfOutputPorts, InternalMarker, - SenderChange, StartOfOutputPorts, + EndOfOutputPorts, + SenderChange ) from core.models.marker import EndOfUpstream, State, StartOfUpstream from core.models.payload import DataFrame, DataPayload, MarkerFrame from proto.edu.uci.ics.amber.engine.common import ( ActorVirtualIdentity, PortIdentity, - ChannelIdentity, + ChannelIdentity ) @@ -84,7 +84,7 @@ def register_input( def process_data_payload( self, from_: ActorVirtualIdentity, payload: DataPayload - ) -> Iterator[Union[Tuple, EndOfUpstream, InternalMarker]]: + ) -> Iterator[Union[Tuple, InternalMarker]]: # special case used to yield for source op if from_ == InputManager.SOURCE_STARTER: yield EndOfUpstream() diff --git a/core/amber/src/main/python/core/runnables/network_receiver.py b/core/amber/src/main/python/core/runnables/network_receiver.py index 0aec42718da..290019cf680 100644 --- a/core/amber/src/main/python/core/runnables/network_receiver.py +++ b/core/amber/src/main/python/core/runnables/network_receiver.py @@ -63,19 +63,18 @@ def data_handler(command: bytes, table: Table) -> int: :return: sender credits """ data_header = PythonDataHeader().parse(command) - payload_type = data_header.payload_type - if payload_type == "Data": - payload = DataFrame(table) - elif payload_type == "State": - payload = MarkerFrame(State(table)) - elif payload_type == "StartOfUpstream": - payload = MarkerFrame(StartOfUpstream()) - elif payload_type == "EndOfUpstream": - payload = MarkerFrame(EndOfUpstream()) + + payload_map = { + "Data": lambda table: DataFrame(table), + "State": lambda table: MarkerFrame(State(table)), + "StartOfUpstream": lambda _: MarkerFrame(StartOfUpstream()), + "EndOfUpstream": lambda _: MarkerFrame(EndOfUpstream()) + } + shared_queue.put( DataElement( tag=data_header.tag, - payload=payload, + payload=payload_map[data_header.payload_type](table), ) ) From fdf68df3d5816e67e771eb55b91bc6a61813b5c4 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Wed, 18 Sep 2024 00:07:32 -0700 Subject: [PATCH 117/163] update --- .../architecture/packaging/input_manager.py | 47 +++++++++++-------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index 6418ffc2055..f21e73b568a 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -12,7 +12,7 @@ from proto.edu.uci.ics.amber.engine.common import ( ActorVirtualIdentity, PortIdentity, - ChannelIdentity + ChannelIdentity, ) @@ -71,7 +71,7 @@ def get_port_id(self, channel_id: ChannelIdentity) -> PortIdentity: return self._channels[channel_id].port_id def register_input( - self, channel_id: ChannelIdentity, port_id: PortIdentity + self, channel_id: ChannelIdentity, port_id: PortIdentity ) -> None: if port_id.id is None: port_id.id = 0 @@ -83,42 +83,39 @@ def register_input( self._ports[port_id].add_channel(channel) def process_data_payload( - self, from_: ActorVirtualIdentity, payload: DataPayload + self, from_: ActorVirtualIdentity, payload: DataPayload ) -> Iterator[Union[Tuple, InternalMarker]]: # special case used to yield for source op if from_ == InputManager.SOURCE_STARTER: yield EndOfUpstream() yield EndOfOutputPorts() return - current_channel_id = None - for channel_id, channel in self._channels.items(): - if channel_id.from_worker_id == from_: - current_channel_id = channel_id + + current_channel_id = next( + (channel_id for channel_id, channel in self._channels.items() + if channel_id.from_worker_id == from_), None + ) if ( - self._current_channel_id is None - or self._current_channel_id != current_channel_id + self._current_channel_id is None + or self._current_channel_id != current_channel_id ): self._current_channel_id = current_channel_id yield SenderChange(current_channel_id) + if isinstance(payload, DataFrame): - for field_accessor in ArrowTableTupleProvider(payload.frame): - yield Tuple( - {name: field_accessor for name in payload.frame.column_names}, - schema=self._ports[ - self._channels[self._current_channel_id].port_id - ].get_schema(), - ) + elif isinstance(payload, MarkerFrame): - if isinstance(payload.frame, State): - yield payload.frame - if isinstance(payload.frame, StartOfUpstream): # StartOfInputChannel() + frame = payload.frame + if isinstance(frame, State): + yield frame + if isinstance(frame, StartOfUpstream): # StartOfInputChannel() if not self.started: yield StartOfOutputPorts() self.started = True yield StartOfUpstream() # StartOfInputChannel() - if isinstance(payload.frame, EndOfUpstream): # EndOfInputChannel() + if isinstance(frame, EndOfUpstream): # EndOfInputChannel() channel = self._channels[self._current_channel_id] channel.complete() port_id = channel.port_id @@ -141,3 +138,13 @@ def process_data_payload( else: raise NotImplementedError() + + def _process_data(self, frame: Table) -> Iterator[Tuple]: + schema = self._ports[ + self._channels[self._current_channel_id].port_id + ].get_schema() + for field_accessor in ArrowTableTupleProvider(payload.frame): + yield Tuple( + {name: field_accessor for name in payload.frame.column_names}, + schema=schema + ) From f0e028e70d5f4aac421836518ed33f9749bb4234 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 18 Sep 2024 00:23:19 -0700 Subject: [PATCH 118/163] fix fmt --- .../architecture/packaging/input_manager.py | 72 +++++++++---------- 1 file changed, 36 insertions(+), 36 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index f21e73b568a..a464c280d48 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -1,5 +1,5 @@ from typing import Iterator, Optional, Union, Dict, List - +from pyarrow.lib import Table from core.models import Tuple, ArrowTableTupleProvider, Schema from core.models.internal_marker import ( InternalMarker, @@ -7,7 +7,7 @@ EndOfOutputPorts, SenderChange ) -from core.models.marker import EndOfUpstream, State, StartOfUpstream +from core.models.marker import EndOfUpstream, State, StartOfUpstream, Marker from core.models.payload import DataFrame, DataPayload, MarkerFrame from proto.edu.uci.ics.amber.engine.common import ( ActorVirtualIdentity, @@ -104,47 +104,47 @@ def process_data_payload( yield SenderChange(current_channel_id) if isinstance(payload, DataFrame): - - + yield from self._process_data(payload.frame) elif isinstance(payload, MarkerFrame): - frame = payload.frame - if isinstance(frame, State): - yield frame - if isinstance(frame, StartOfUpstream): # StartOfInputChannel() - if not self.started: - yield StartOfOutputPorts() - self.started = True - yield StartOfUpstream() # StartOfInputChannel() - if isinstance(frame, EndOfUpstream): # EndOfInputChannel() - channel = self._channels[self._current_channel_id] - channel.complete() - port_id = channel.port_id - port_completed = all( - map( - lambda channel: channel.is_completed(), - self._ports[port_id].channels, - ) - ) - - if port_completed: - yield EndOfUpstream() # EndOfInputPort() - - all_ports_completed = all( - map(lambda port: port.is_completed(), self._ports.values()) - ) - - if all_ports_completed: - yield EndOfOutputPorts() - + yield from self._process_marker(payload.frame) else: raise NotImplementedError() - def _process_data(self, frame: Table) -> Iterator[Tuple]: + def _process_data(self, table: Table) -> Iterator[Tuple]: schema = self._ports[ self._channels[self._current_channel_id].port_id ].get_schema() - for field_accessor in ArrowTableTupleProvider(payload.frame): + for field_accessor in ArrowTableTupleProvider(table): yield Tuple( - {name: field_accessor for name in payload.frame.column_names}, + {name: field_accessor for name in table.column_names}, schema=schema ) + + def _process_marker(self, marker: Marker) -> Iterator[InternalMarker]: + if isinstance(marker, State): + yield marker + if isinstance(marker, StartOfUpstream): # StartOfInputChannel() + if not self.started: + yield StartOfOutputPorts() + self.started = True + yield StartOfUpstream() # StartOfInputChannel() + if isinstance(marker, EndOfUpstream): # EndOfInputChannel() + channel = self._channels[self._current_channel_id] + channel.complete() + port_id = channel.port_id + port_completed = all( + map( + lambda channel: channel.is_completed(), + self._ports[port_id].channels, + ) + ) + + if port_completed: + yield EndOfUpstream() # EndOfInputPort() + + all_ports_completed = all( + map(lambda port: port.is_completed(), self._ports.values()) + ) + + if all_ports_completed: + yield EndOfOutputPorts() \ No newline at end of file From efa22ffad8c58df8689a621630327053fe189528 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Wed, 18 Sep 2024 00:28:33 -0700 Subject: [PATCH 119/163] update --- .../main/python/core/architecture/packaging/input_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index a464c280d48..a69138693b4 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -147,4 +147,4 @@ def _process_marker(self, marker: Marker) -> Iterator[InternalMarker]: ) if all_ports_completed: - yield EndOfOutputPorts() \ No newline at end of file + yield EndOfOutputPorts() From e3ff682db2118cf2fd782c1d5f831987e2904f56 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Wed, 18 Sep 2024 00:28:55 -0700 Subject: [PATCH 120/163] update --- .../architecture/managers/pause_manager.py | 6 ++--- .../architecture/packaging/input_manager.py | 21 +++++++++------- .../src/main/python/core/models/tuple.py | 6 +++-- .../python/core/runnables/network_receiver.py | 2 +- .../linked_blocking_multi_queue.py | 6 ++--- .../proto/edu/uci/ics/texera/web/__init__.py | 24 +++++++++---------- .../src/main/python/proto/scalapb/__init__.py | 18 +++++++------- 7 files changed, 44 insertions(+), 39 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/managers/pause_manager.py b/core/amber/src/main/python/core/architecture/managers/pause_manager.py index 307640f8b7d..fa74b4006d7 100644 --- a/core/amber/src/main/python/core/architecture/managers/pause_manager.py +++ b/core/amber/src/main/python/core/architecture/managers/pause_manager.py @@ -31,9 +31,9 @@ def __init__( ): self._input_queue: InternalQueue = input_queue self._global_pauses: Set[PauseType] = set() - self._specific_input_pauses: Dict[PauseType, Set[ActorVirtualIdentity]] = ( - defaultdict(set) - ) + self._specific_input_pauses: Dict[ + PauseType, Set[ActorVirtualIdentity] + ] = defaultdict(set) self._state_manager = state_manager def pause(self, pause_type: PauseType, change_state=True) -> None: diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index a69138693b4..b58e489ff61 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -5,7 +5,7 @@ InternalMarker, StartOfOutputPorts, EndOfOutputPorts, - SenderChange + SenderChange, ) from core.models.marker import EndOfUpstream, State, StartOfUpstream, Marker from core.models.payload import DataFrame, DataPayload, MarkerFrame @@ -71,7 +71,7 @@ def get_port_id(self, channel_id: ChannelIdentity) -> PortIdentity: return self._channels[channel_id].port_id def register_input( - self, channel_id: ChannelIdentity, port_id: PortIdentity + self, channel_id: ChannelIdentity, port_id: PortIdentity ) -> None: if port_id.id is None: port_id.id = 0 @@ -83,7 +83,7 @@ def register_input( self._ports[port_id].add_channel(channel) def process_data_payload( - self, from_: ActorVirtualIdentity, payload: DataPayload + self, from_: ActorVirtualIdentity, payload: DataPayload ) -> Iterator[Union[Tuple, InternalMarker]]: # special case used to yield for source op if from_ == InputManager.SOURCE_STARTER: @@ -92,13 +92,17 @@ def process_data_payload( return current_channel_id = next( - (channel_id for channel_id, channel in self._channels.items() - if channel_id.from_worker_id == from_), None + ( + channel_id + for channel_id, channel in self._channels.items() + if channel_id.from_worker_id == from_ + ), + None, ) if ( - self._current_channel_id is None - or self._current_channel_id != current_channel_id + self._current_channel_id is None + or self._current_channel_id != current_channel_id ): self._current_channel_id = current_channel_id yield SenderChange(current_channel_id) @@ -116,8 +120,7 @@ def _process_data(self, table: Table) -> Iterator[Tuple]: ].get_schema() for field_accessor in ArrowTableTupleProvider(table): yield Tuple( - {name: field_accessor for name in table.column_names}, - schema=schema + {name: field_accessor for name in table.column_names}, schema=schema ) def _process_marker(self, marker: Marker) -> Iterator[InternalMarker]: diff --git a/core/amber/src/main/python/core/models/tuple.py b/core/amber/src/main/python/core/models/tuple.py index 0b783cafda8..10b079d5a6e 100644 --- a/core/amber/src/main/python/core/models/tuple.py +++ b/core/amber/src/main/python/core/models/tuple.py @@ -19,9 +19,11 @@ @runtime_checkable class TupleLike(Protocol): - def __getitem__(self, item: typing.Union[str, int]) -> Field: ... + def __getitem__(self, item: typing.Union[str, int]) -> Field: + ... - def __setitem__(self, key: typing.Union[str, int], value: Field) -> None: ... + def __setitem__(self, key: typing.Union[str, int], value: Field) -> None: + ... class ArrowTableTupleProvider: diff --git a/core/amber/src/main/python/core/runnables/network_receiver.py b/core/amber/src/main/python/core/runnables/network_receiver.py index 290019cf680..e0463dcb5f1 100644 --- a/core/amber/src/main/python/core/runnables/network_receiver.py +++ b/core/amber/src/main/python/core/runnables/network_receiver.py @@ -68,7 +68,7 @@ def data_handler(command: bytes, table: Table) -> int: "Data": lambda table: DataFrame(table), "State": lambda table: MarkerFrame(State(table)), "StartOfUpstream": lambda _: MarkerFrame(StartOfUpstream()), - "EndOfUpstream": lambda _: MarkerFrame(EndOfUpstream()) + "EndOfUpstream": lambda _: MarkerFrame(EndOfUpstream()), } shared_queue.put( diff --git a/core/amber/src/main/python/core/util/customized_queue/linked_blocking_multi_queue.py b/core/amber/src/main/python/core/util/customized_queue/linked_blocking_multi_queue.py index 614a6b88198..ec15d2785ff 100644 --- a/core/amber/src/main/python/core/util/customized_queue/linked_blocking_multi_queue.py +++ b/core/amber/src/main/python/core/util/customized_queue/linked_blocking_multi_queue.py @@ -224,9 +224,9 @@ class DefaultSubQueueSelection(Generic[T]): def __init__( self, priority_groups: List[LinkedBlockingMultiQueue.PriorityGroup[T]] ): - self.priority_groups: List[LinkedBlockingMultiQueue.PriorityGroup[T]] = ( - priority_groups - ) + self.priority_groups: List[ + LinkedBlockingMultiQueue.PriorityGroup[T] + ] = priority_groups def get_next(self) -> Optional[LinkedBlockingMultiQueue.SubQueue[T]]: for pg in self.priority_groups: diff --git a/core/amber/src/main/python/proto/edu/uci/ics/texera/web/__init__.py b/core/amber/src/main/python/proto/edu/uci/ics/texera/web/__init__.py index 8979cc0b8f1..8f3d9bc34a5 100644 --- a/core/amber/src/main/python/proto/edu/uci/ics/texera/web/__init__.py +++ b/core/amber/src/main/python/proto/edu/uci/ics/texera/web/__init__.py @@ -61,16 +61,16 @@ class ExecutionBreakpointStore(betterproto.Message): @dataclass(eq=False, repr=False) class EvaluatedValueList(betterproto.Message): - values: List["__amber_engine_architecture_worker__.EvaluatedValue"] = ( - betterproto.message_field(1) - ) + values: List[ + "__amber_engine_architecture_worker__.EvaluatedValue" + ] = betterproto.message_field(1) @dataclass(eq=False, repr=False) class OperatorConsole(betterproto.Message): - console_messages: List["__amber_engine_architecture_worker__.ConsoleMessage"] = ( - betterproto.message_field(1) - ) + console_messages: List[ + "__amber_engine_architecture_worker__.ConsoleMessage" + ] = betterproto.message_field(1) evaluate_expr_results: Dict[str, "EvaluatedValueList"] = betterproto.map_field( 2, betterproto.TYPE_STRING, betterproto.TYPE_MESSAGE ) @@ -91,12 +91,12 @@ class OperatorWorkerMapping(betterproto.Message): @dataclass(eq=False, repr=False) class OperatorStatistics(betterproto.Message): - input_count: List["__amber_engine_architecture_worker__.PortTupleCountMapping"] = ( - betterproto.message_field(1) - ) - output_count: List["__amber_engine_architecture_worker__.PortTupleCountMapping"] = ( - betterproto.message_field(2) - ) + input_count: List[ + "__amber_engine_architecture_worker__.PortTupleCountMapping" + ] = betterproto.message_field(1) + output_count: List[ + "__amber_engine_architecture_worker__.PortTupleCountMapping" + ] = betterproto.message_field(2) num_workers: int = betterproto.int32_field(3) data_processing_time: int = betterproto.int64_field(4) control_processing_time: int = betterproto.int64_field(5) diff --git a/core/amber/src/main/python/proto/scalapb/__init__.py b/core/amber/src/main/python/proto/scalapb/__init__.py index 51a1655804e..270d1d85d23 100644 --- a/core/amber/src/main/python/proto/scalapb/__init__.py +++ b/core/amber/src/main/python/proto/scalapb/__init__.py @@ -143,14 +143,14 @@ class ScalaPbOptions(betterproto.Message): java_conversions: bool = betterproto.bool_field(23) """Enable java conversions for this file.""" - aux_message_options: List["ScalaPbOptionsAuxMessageOptions"] = ( - betterproto.message_field(18) - ) + aux_message_options: List[ + "ScalaPbOptionsAuxMessageOptions" + ] = betterproto.message_field(18) """List of message options to apply to some messages.""" - aux_field_options: List["ScalaPbOptionsAuxFieldOptions"] = ( - betterproto.message_field(19) - ) + aux_field_options: List[ + "ScalaPbOptionsAuxFieldOptions" + ] = betterproto.message_field(19) """List of message options to apply to some fields.""" aux_enum_options: List["ScalaPbOptionsAuxEnumOptions"] = betterproto.message_field( @@ -158,9 +158,9 @@ class ScalaPbOptions(betterproto.Message): ) """List of message options to apply to some enums.""" - aux_enum_value_options: List["ScalaPbOptionsAuxEnumValueOptions"] = ( - betterproto.message_field(22) - ) + aux_enum_value_options: List[ + "ScalaPbOptionsAuxEnumValueOptions" + ] = betterproto.message_field(22) """List of enum value options to apply to some enum values.""" preprocessors: List[str] = betterproto.string_field(24) From f4f5318e6639e41222d0c24eaafa168cafc6093d Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 18 Sep 2024 01:55:15 -0700 Subject: [PATCH 121/163] fix fmt --- .../architecture/managers/pause_manager.py | 6 ++--- .../src/main/python/core/models/tuple.py | 6 ++--- .../linked_blocking_multi_queue.py | 6 ++--- .../proto/edu/uci/ics/texera/web/__init__.py | 24 +++++++++---------- .../src/main/python/proto/scalapb/__init__.py | 18 +++++++------- 5 files changed, 29 insertions(+), 31 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/managers/pause_manager.py b/core/amber/src/main/python/core/architecture/managers/pause_manager.py index fa74b4006d7..307640f8b7d 100644 --- a/core/amber/src/main/python/core/architecture/managers/pause_manager.py +++ b/core/amber/src/main/python/core/architecture/managers/pause_manager.py @@ -31,9 +31,9 @@ def __init__( ): self._input_queue: InternalQueue = input_queue self._global_pauses: Set[PauseType] = set() - self._specific_input_pauses: Dict[ - PauseType, Set[ActorVirtualIdentity] - ] = defaultdict(set) + self._specific_input_pauses: Dict[PauseType, Set[ActorVirtualIdentity]] = ( + defaultdict(set) + ) self._state_manager = state_manager def pause(self, pause_type: PauseType, change_state=True) -> None: diff --git a/core/amber/src/main/python/core/models/tuple.py b/core/amber/src/main/python/core/models/tuple.py index 10b079d5a6e..0b783cafda8 100644 --- a/core/amber/src/main/python/core/models/tuple.py +++ b/core/amber/src/main/python/core/models/tuple.py @@ -19,11 +19,9 @@ @runtime_checkable class TupleLike(Protocol): - def __getitem__(self, item: typing.Union[str, int]) -> Field: - ... + def __getitem__(self, item: typing.Union[str, int]) -> Field: ... - def __setitem__(self, key: typing.Union[str, int], value: Field) -> None: - ... + def __setitem__(self, key: typing.Union[str, int], value: Field) -> None: ... class ArrowTableTupleProvider: diff --git a/core/amber/src/main/python/core/util/customized_queue/linked_blocking_multi_queue.py b/core/amber/src/main/python/core/util/customized_queue/linked_blocking_multi_queue.py index ec15d2785ff..614a6b88198 100644 --- a/core/amber/src/main/python/core/util/customized_queue/linked_blocking_multi_queue.py +++ b/core/amber/src/main/python/core/util/customized_queue/linked_blocking_multi_queue.py @@ -224,9 +224,9 @@ class DefaultSubQueueSelection(Generic[T]): def __init__( self, priority_groups: List[LinkedBlockingMultiQueue.PriorityGroup[T]] ): - self.priority_groups: List[ - LinkedBlockingMultiQueue.PriorityGroup[T] - ] = priority_groups + self.priority_groups: List[LinkedBlockingMultiQueue.PriorityGroup[T]] = ( + priority_groups + ) def get_next(self) -> Optional[LinkedBlockingMultiQueue.SubQueue[T]]: for pg in self.priority_groups: diff --git a/core/amber/src/main/python/proto/edu/uci/ics/texera/web/__init__.py b/core/amber/src/main/python/proto/edu/uci/ics/texera/web/__init__.py index 8f3d9bc34a5..8979cc0b8f1 100644 --- a/core/amber/src/main/python/proto/edu/uci/ics/texera/web/__init__.py +++ b/core/amber/src/main/python/proto/edu/uci/ics/texera/web/__init__.py @@ -61,16 +61,16 @@ class ExecutionBreakpointStore(betterproto.Message): @dataclass(eq=False, repr=False) class EvaluatedValueList(betterproto.Message): - values: List[ - "__amber_engine_architecture_worker__.EvaluatedValue" - ] = betterproto.message_field(1) + values: List["__amber_engine_architecture_worker__.EvaluatedValue"] = ( + betterproto.message_field(1) + ) @dataclass(eq=False, repr=False) class OperatorConsole(betterproto.Message): - console_messages: List[ - "__amber_engine_architecture_worker__.ConsoleMessage" - ] = betterproto.message_field(1) + console_messages: List["__amber_engine_architecture_worker__.ConsoleMessage"] = ( + betterproto.message_field(1) + ) evaluate_expr_results: Dict[str, "EvaluatedValueList"] = betterproto.map_field( 2, betterproto.TYPE_STRING, betterproto.TYPE_MESSAGE ) @@ -91,12 +91,12 @@ class OperatorWorkerMapping(betterproto.Message): @dataclass(eq=False, repr=False) class OperatorStatistics(betterproto.Message): - input_count: List[ - "__amber_engine_architecture_worker__.PortTupleCountMapping" - ] = betterproto.message_field(1) - output_count: List[ - "__amber_engine_architecture_worker__.PortTupleCountMapping" - ] = betterproto.message_field(2) + input_count: List["__amber_engine_architecture_worker__.PortTupleCountMapping"] = ( + betterproto.message_field(1) + ) + output_count: List["__amber_engine_architecture_worker__.PortTupleCountMapping"] = ( + betterproto.message_field(2) + ) num_workers: int = betterproto.int32_field(3) data_processing_time: int = betterproto.int64_field(4) control_processing_time: int = betterproto.int64_field(5) diff --git a/core/amber/src/main/python/proto/scalapb/__init__.py b/core/amber/src/main/python/proto/scalapb/__init__.py index 270d1d85d23..51a1655804e 100644 --- a/core/amber/src/main/python/proto/scalapb/__init__.py +++ b/core/amber/src/main/python/proto/scalapb/__init__.py @@ -143,14 +143,14 @@ class ScalaPbOptions(betterproto.Message): java_conversions: bool = betterproto.bool_field(23) """Enable java conversions for this file.""" - aux_message_options: List[ - "ScalaPbOptionsAuxMessageOptions" - ] = betterproto.message_field(18) + aux_message_options: List["ScalaPbOptionsAuxMessageOptions"] = ( + betterproto.message_field(18) + ) """List of message options to apply to some messages.""" - aux_field_options: List[ - "ScalaPbOptionsAuxFieldOptions" - ] = betterproto.message_field(19) + aux_field_options: List["ScalaPbOptionsAuxFieldOptions"] = ( + betterproto.message_field(19) + ) """List of message options to apply to some fields.""" aux_enum_options: List["ScalaPbOptionsAuxEnumOptions"] = betterproto.message_field( @@ -158,9 +158,9 @@ class ScalaPbOptions(betterproto.Message): ) """List of message options to apply to some enums.""" - aux_enum_value_options: List[ - "ScalaPbOptionsAuxEnumValueOptions" - ] = betterproto.message_field(22) + aux_enum_value_options: List["ScalaPbOptionsAuxEnumValueOptions"] = ( + betterproto.message_field(22) + ) """List of enum value options to apply to some enum values.""" preprocessors: List[str] = betterproto.string_field(24) From 2e136f21e60527c1b676f4db995f688f7cab243c Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 18 Sep 2024 02:35:39 -0700 Subject: [PATCH 122/163] fix fmt --- .../uci/ics/texera/workflow/common/operators/LogicalOp.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala index b3c23f5f2bc..14a433170f3 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/operators/LogicalOp.scala @@ -134,8 +134,11 @@ import edu.uci.ics.texera.workflow.operators.visualization.scatter3DChart.Scatte import edu.uci.ics.texera.workflow.operators.visualization.ScatterMatrixChart.ScatterMatrixChartOpDesc import edu.uci.ics.texera.workflow.operators.visualization.funnelPlot.FunnelPlotOpDesc import edu.uci.ics.texera.workflow.operators.visualization.tablesChart.TablesPlotOpDesc + import edu.uci.ics.texera.workflow.operators.visualization.icicleChart.IcicleChartOpDesc + import edu.uci.ics.texera.workflow.operators.visualization.continuousErrorBands.ContinuousErrorBandsOpDesc + import edu.uci.ics.texera.workflow.operators.visualization.ternaryPlot.TernaryPlotOpDesc import org.apache.commons.lang3.builder.{EqualsBuilder, HashCodeBuilder, ToStringBuilder} import org.apache.zookeeper.KeeperException.UnimplementedException @@ -143,7 +146,6 @@ import edu.uci.ics.texera.workflow.operators.machineLearning.Scorer.MachineLearn import edu.uci.ics.texera.workflow.operators.visualization.quiverPlot.QuiverPlotOpDesc import edu.uci.ics.texera.workflow.operators.visualization.contourPlot.ContourPlotOpDesc import edu.uci.ics.texera.workflow.operators.visualization.figureFactoryTable.FigureFactoryTableOpDesc - import java.util.UUID import scala.collection.mutable import scala.util.Try From cff6f57ff45a392c242d1b9d80ec257c98cdc340 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 18 Sep 2024 03:15:14 -0700 Subject: [PATCH 123/163] fix fmt --- .../uci/ics/amber/engine/architecture/worker/DataProcessor.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index fc4f2002e18..e93fc84b68d 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -181,7 +181,6 @@ class DataProcessor( ) asyncRPCClient.send(WorkerExecutionCompleted(), CONTROLLER) case FinalizePort(portId, input) => - //operator.onOutputFinish(portId.id) asyncRPCClient.send(PortCompleted(portId, input), CONTROLLER) case schemaEnforceable: SchemaEnforceable => if (outputPortOpt.isEmpty) { From 637228882306e7173c066e7b781ff1236f671bc8 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Wed, 18 Sep 2024 03:28:44 -0700 Subject: [PATCH 124/163] update --- .../amber/engine/architecture/worker/DataProcessorSpec.scala | 4 ---- 1 file changed, 4 deletions(-) diff --git a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala index cdb65abebce..c39dd34a8da 100644 --- a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala +++ b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala @@ -147,10 +147,6 @@ class DataProcessorSpec extends AnyFlatSpec with MockFactory with BeforeAndAfter ChannelIdentity(CONTROLLER, testWorkerId, isControl = true), ControlInvocation(0, OpenExecutor()) ) - dp.processDataPayload( - ChannelIdentity(senderWorkerId, testWorkerId, isControl = false), - MarkerFrame(StartOfUpstream()) - ) dp.processDataPayload( ChannelIdentity(senderWorkerId, testWorkerId, isControl = false), DataFrame(tuples) From 990a72095c2f6bac6d09e22d1ef6c0c9ff7b01e9 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Wed, 18 Sep 2024 04:23:30 -0700 Subject: [PATCH 125/163] update --- .../engine/architecture/worker/DataProcessor.scala | 2 +- .../engine/architecture/worker/DataProcessorSpec.scala | 10 +++++++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index e93fc84b68d..240ccb5719b 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -131,7 +131,7 @@ class DataProcessor( private[this] def processEndOfUpstream(portId: Int): Unit = { try { val outputState = executor.produceStateOnFinish(portId) - if (outputState.isDefined) { + if (outputState != null && outputState.isDefined) { outputManager.emitMarker(outputState.get) } outputManager.outputIterator.setTupleOutput( diff --git a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala index c39dd34a8da..4e25be5650b 100644 --- a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala +++ b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala @@ -19,7 +19,7 @@ import edu.uci.ics.amber.engine.common.virtualidentity.{ PhysicalOpIdentity } import edu.uci.ics.amber.engine.common.workflow.PortIdentity -import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfUpstream} +import edu.uci.ics.texera.workflow.common.EndOfUpstream import edu.uci.ics.texera.workflow.common.WorkflowContext.DEFAULT_WORKFLOW_ID import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple @@ -130,6 +130,12 @@ class DataProcessorSpec extends AnyFlatSpec with MockFactory with BeforeAndAfter ) .expects(x, 0) } + ( + ( + input: Int + ) => executor.produceStateOnFinish(input) + ) + .expects(0) ( ( input: Int @@ -159,7 +165,9 @@ class DataProcessorSpec extends AnyFlatSpec with MockFactory with BeforeAndAfter dp.continueDataProcessing() } (adaptiveBatchingMonitor.stopAdaptiveBatching _).expects().once() + (executor.close _).expects().once() + dp.processDataPayload( ChannelIdentity(senderWorkerId, testWorkerId, isControl = false), MarkerFrame(EndOfUpstream()) From 6e50be14464d5754420c5c47b1558274bd4f4870 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Wed, 18 Sep 2024 04:25:55 -0700 Subject: [PATCH 126/163] update --- .../engine/architecture/worker/DataProcessorSpec.scala | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala index 4e25be5650b..11260b09b5e 100644 --- a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala +++ b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala @@ -78,6 +78,12 @@ class DataProcessorSpec extends AnyFlatSpec with MockFactory with BeforeAndAfter ) .expects(x, 0) } + ( + ( + input: Int + ) => executor.produceStateOnFinish(input) + ) + .expects(0) ( ( input: Int From 8889e61507b5f1a54d76aaa710cd775925b11dd8 Mon Sep 17 00:00:00 2001 From: Xinyuan Lin Date: Wed, 18 Sep 2024 04:33:17 -0700 Subject: [PATCH 127/163] update --- .../engine/architecture/worker/DataProcessorSpec.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala index 11260b09b5e..57eaeed1857 100644 --- a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala +++ b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala @@ -79,10 +79,10 @@ class DataProcessorSpec extends AnyFlatSpec with MockFactory with BeforeAndAfter .expects(x, 0) } ( - ( - input: Int - ) => executor.produceStateOnFinish(input) - ) + ( + input: Int + ) => executor.produceStateOnFinish(input) + ) .expects(0) ( ( From 183d21daf7b18d1201f273eecb4d0abdfd8c10d1 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 18 Sep 2024 23:39:51 -0700 Subject: [PATCH 128/163] fix --- .../core/architecture/managers/tuple_processing_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py index a4b94f89d91..69fe5289ea3 100644 --- a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py +++ b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py @@ -14,7 +14,7 @@ def __init__(self): self.context_switch_condition: Condition = Condition() self.finished_current: Event = Event() - def get_input_tuple(self) -> Optional[State]: + def get_input_tuple(self) -> Optional[Tuple]: ret, self.current_input_tuple = self.current_input_tuple, None return ret From a0a34a12ec358d489c23d442ee9c179b69a1e6eb Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 18 Sep 2024 23:42:53 -0700 Subject: [PATCH 129/163] fix --- .../architecture/managers/tuple_processing_manager.py | 11 ++++------- .../src/main/python/core/runnables/data_processor.py | 4 ++-- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py index 69fe5289ea3..574644a2800 100644 --- a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py +++ b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py @@ -22,12 +22,9 @@ def get_output_tuple(self) -> Optional[Tuple]: ret, self.current_output_tuple = self.current_output_tuple, None return ret - def get_input_port(self) -> int: + def get_input_port_id(self) -> int: port_id = self.current_input_port_id - port: int + # no upstream, special case for source executor. if port_id is None: - # no upstream, special case for source executor. - port = 0 - else: - port = port_id.id - return port + return 0 + return port_id.id \ No newline at end of file diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index d1222786fde..5c3b9f7354d 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -43,7 +43,7 @@ def run(self) -> None: def process_marker(self, marker: Marker) -> None: try: executor = self._context.executor_manager.executor - port = self._context.tuple_processing_manager.get_input_port() + port = self._context.tuple_processing_manager.get_input_port_id() with replace_print( self._context.worker_id, self._context.console_message_manager.print_buf, @@ -70,7 +70,7 @@ def process_tuple(self) -> None: while not finished_current.is_set(): try: executor = self._context.executor_manager.executor - port = self._context.tuple_processing_manager.get_input_port() + port = self._context.tuple_processing_manager.get_input_port_id() tuple_ = self._context.tuple_processing_manager.get_input_tuple() with replace_print( self._context.worker_id, From 3316f479c4cda06a3248dd01a7246378115d6c15 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Wed, 18 Sep 2024 23:55:05 -0700 Subject: [PATCH 130/163] fix --- core/amber/src/main/python/core/models/payload.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/amber/src/main/python/core/models/payload.py b/core/amber/src/main/python/core/models/payload.py index 9bc4994f8d6..2977528a58f 100644 --- a/core/amber/src/main/python/core/models/payload.py +++ b/core/amber/src/main/python/core/models/payload.py @@ -1,7 +1,5 @@ from dataclasses import dataclass - from pyarrow.lib import Table - from core.models.marker import Marker From e0a856bca48be060bbfed8bc20b3c331ae180d7d Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 00:01:18 -0700 Subject: [PATCH 131/163] fix --- .../main/python/core/runnables/data_processor.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index 5c3b9f7354d..454d17ba636 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -43,18 +43,18 @@ def run(self) -> None: def process_marker(self, marker: Marker) -> None: try: executor = self._context.executor_manager.executor - port = self._context.tuple_processing_manager.get_input_port_id() + port_id = self._context.tuple_processing_manager.get_input_port_id() with replace_print( self._context.worker_id, self._context.console_message_manager.print_buf, ): if isinstance(marker, StartOfUpstream): - self._set_output_state(executor.produce_state_on_start(port)) + self._set_output_state(executor.produce_state_on_start(port_id)) elif isinstance(marker, State): - self._set_output_state(executor.process_state(marker, port)) + self._set_output_state(executor.process_state(marker, port_id)) elif isinstance(marker, EndOfUpstream): - self._set_output_state(executor.produce_state_on_finish(port)) - self._set_output_tuple(executor.on_finish(port)) + self._set_output_state(executor.produce_state_on_finish(port_id)) + self._set_output_tuple(executor.on_finish(port_id)) except Exception as err: logger.exception(err) @@ -70,13 +70,13 @@ def process_tuple(self) -> None: while not finished_current.is_set(): try: executor = self._context.executor_manager.executor - port = self._context.tuple_processing_manager.get_input_port_id() + port_id = self._context.tuple_processing_manager.get_input_port_id() tuple_ = self._context.tuple_processing_manager.get_input_tuple() with replace_print( self._context.worker_id, self._context.console_message_manager.print_buf, ): - self._set_output_tuple(executor.process_tuple(tuple_, port)) + self._set_output_tuple(executor.process_tuple(tuple_, port_id)) except Exception as err: logger.exception(err) From d91c16758b43fcfc4c9affad08547780f47f5187 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 00:05:13 -0700 Subject: [PATCH 132/163] fix --- .../main/scala/edu/uci/ics/texera/workflow/common/Marker.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index a652746b336..403afc485b3 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -43,8 +43,6 @@ final case class State(tuple: Option[Tuple] = None, passToAllDownstream: Boolean .addSequentially(list.values.map(_._2).toArray) .build() - def size: Int = list.size - override def toString: String = list.map { case (key, (_, value)) => s"$key: $value" }.mkString(", ") } From 600934fa107b1ee84b97d398ab8f447acecb678f Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 00:10:24 -0700 Subject: [PATCH 133/163] fix --- core/amber/src/main/python/core/runnables/main_loop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index dc58cae9ef1..1f4926cef53 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -150,7 +150,7 @@ def process_control_payload( def process_input_tuple(self) -> None: """ - Process the current input tuple or state with the current input link. + Process the current input tuple with the current input link. Send all result Tuples or State to downstream workers. This is being invoked for each Tuple/Marker that are unpacked from the From 7f511dffe25f117913d9c241239a415d7c0b9508 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 00:19:40 -0700 Subject: [PATCH 134/163] fix --- core/amber/src/main/python/core/runnables/main_loop.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index 1f4926cef53..f9557799440 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -161,14 +161,14 @@ def process_input_tuple(self) -> None: self.context.tuple_processing_manager.current_input_port_id ) - for output_data in self.process_tuple_with_udf(): + for output_tuple in self.process_tuple_with_udf(): self._check_and_process_control() - if output_data is not None: + if output_tuple is not None: self.context.statistics_manager.increase_output_tuple_count( PortIdentity(0) ) for to, batch in self.context.output_manager.tuple_to_batch( - output_data + output_tuple ): self._output_queue.put(DataElement(tag=to, payload=batch)) @@ -186,7 +186,7 @@ def process_tuple_with_udf(self) -> Iterator[Optional[Tuple]]: This is a wrapper to invoke processing of the executor. - :return: Iterator[Tuple], iterator of result Tuple(s) or State. + :return: Iterator[Tuple], iterator of result Tuple(s). """ finished_current = self.context.tuple_processing_manager.finished_current finished_current.clear() From 47f88e6299c74ee1e333f0026bfc03ee31372deb Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 00:21:48 -0700 Subject: [PATCH 135/163] fix --- core/amber/src/main/python/core/runnables/main_loop.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index f9557799440..14d3f76e3c4 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -209,16 +209,16 @@ def _process_tuple(self, tuple_: Tuple) -> None: self.process_input_tuple() self._check_and_process_control() - def _process_state(self, state_: State): + def _process_state(self, state_: State) -> None: self.context.marker_processing_manager.current_input_marker = state_ self.process_input_state() self._check_and_process_control() - def _process_start_of_upstream(self, start_of_upstream: StartOfUpstream): + def _process_start_of_upstream(self, start_of_upstream: StartOfUpstream) -> None: self.context.marker_processing_manager.current_input_marker = start_of_upstream self.process_input_state() - def _process_end_of_upstream(self, end_of_upstream: EndOfUpstream): + def _process_end_of_upstream(self, end_of_upstream: EndOfUpstream) -> None: self.context.marker_processing_manager.current_input_marker = end_of_upstream self.process_input_state() self.process_input_tuple() @@ -264,7 +264,7 @@ def _process_end_of_output_ports(self, _: EndOfOutputPorts) -> None: It will also invoke complete() of this DataProcessor. - :param _: EndOfAllMarker + :param _: EndOfOutputPorts """ for to, batch in self.context.output_manager.emit_marker(EndOfUpstream()): self._output_queue.put(DataElement(tag=to, payload=batch)) From bf159cc5f494647c543908b5f4533674a4c31331 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 00:23:47 -0700 Subject: [PATCH 136/163] fix --- .../ics/amber/engine/architecture/worker/DataProcessor.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index 240ccb5719b..bd2f1c5d0ca 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -101,7 +101,6 @@ class DataProcessor( } } catch safely { case e => - // forward input tuple to the user and pause DP thread handleExecutorException(e) } } @@ -119,7 +118,6 @@ class DataProcessor( } } catch safely { case e => - // forward input tuple to the user and pause DP thread handleExecutorException(e) } } From a1ead8f04a910d58d46dfcd81c1c32c45e42cefe Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 00:25:35 -0700 Subject: [PATCH 137/163] fix --- .../ics/amber/engine/architecture/worker/DataProcessor.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index bd2f1c5d0ca..2fdf8e7c142 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -106,7 +106,7 @@ class DataProcessor( } /** - * process start of an input port with Executor.onStart(). + * process start of an input port with Executor.produceStateOnStart(). * this function is only called by the DP thread. */ private[this] def processStartOfUpstream(portId: Int): Unit = { From 48006200947219d910dc2fd84511af199fb860e0 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 00:26:05 -0700 Subject: [PATCH 138/163] fix --- .../ics/amber/engine/architecture/worker/DataProcessor.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index 2fdf8e7c142..e9e25eed014 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -123,7 +123,7 @@ class DataProcessor( } /** - * process end of an input port with Executor.onFinish(). + * process end of an input port with Executor.produceStateOnFinish(). * this function is only called by the DP thread. */ private[this] def processEndOfUpstream(portId: Int): Unit = { From 64b28f2305f13fe03661a04f35a2c611960f847f Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 02:51:17 -0700 Subject: [PATCH 139/163] update --- .../amber/engine/architecture/worker/DataProcessorSpec.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala index 57eaeed1857..62c159d9504 100644 --- a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala +++ b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala @@ -171,9 +171,7 @@ class DataProcessorSpec extends AnyFlatSpec with MockFactory with BeforeAndAfter dp.continueDataProcessing() } (adaptiveBatchingMonitor.stopAdaptiveBatching _).expects().once() - (executor.close _).expects().once() - dp.processDataPayload( ChannelIdentity(senderWorkerId, testWorkerId, isControl = false), MarkerFrame(EndOfUpstream()) From 13b4daeb63f0395379ebbd3a2689907842ea0fdd Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 02:51:46 -0700 Subject: [PATCH 140/163] update --- core/amber/src/main/python/pytexera/udf/udf_operator.py | 1 - 1 file changed, 1 deletion(-) diff --git a/core/amber/src/main/python/pytexera/udf/udf_operator.py b/core/amber/src/main/python/pytexera/udf/udf_operator.py index ef38f299e78..420cb8f7bfa 100644 --- a/core/amber/src/main/python/pytexera/udf/udf_operator.py +++ b/core/amber/src/main/python/pytexera/udf/udf_operator.py @@ -1,6 +1,5 @@ from abc import abstractmethod from typing import Iterator, Optional, Union - from pyamber import * From d4d2a480228955ce2be1f0ef6aa27f2f7491f5e6 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 02:52:37 -0700 Subject: [PATCH 141/163] update --- .../core/architecture/managers/tuple_processing_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py index 574644a2800..3529e3fb629 100644 --- a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py +++ b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py @@ -27,4 +27,4 @@ def get_input_port_id(self) -> int: # no upstream, special case for source executor. if port_id is None: return 0 - return port_id.id \ No newline at end of file + return port_id.id From 38e57cc8764e5d589ea6f8f8a9a63ba46881749c Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 03:16:08 -0700 Subject: [PATCH 142/163] update --- .../core/architecture/managers/tuple_processing_manager.py | 1 - 1 file changed, 1 deletion(-) diff --git a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py index 3529e3fb629..c217d5fe372 100644 --- a/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py +++ b/core/amber/src/main/python/core/architecture/managers/tuple_processing_manager.py @@ -1,7 +1,6 @@ from threading import Event, Condition from typing import Optional, Tuple, Iterator -from core.models.marker import State from proto.edu.uci.ics.amber.engine.common import PortIdentity From f5b02cf1145b00b76217c49a2f261197c8ab2d10 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 03:38:32 -0700 Subject: [PATCH 143/163] update --- .../python/core/runnables/network_receiver.py | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/core/amber/src/main/python/core/runnables/network_receiver.py b/core/amber/src/main/python/core/runnables/network_receiver.py index e0463dcb5f1..351f07ae517 100644 --- a/core/amber/src/main/python/core/runnables/network_receiver.py +++ b/core/amber/src/main/python/core/runnables/network_receiver.py @@ -64,20 +64,19 @@ def data_handler(command: bytes, table: Table) -> int: """ data_header = PythonDataHeader().parse(command) - payload_map = { - "Data": lambda table: DataFrame(table), - "State": lambda table: MarkerFrame(State(table)), - "StartOfUpstream": lambda _: MarkerFrame(StartOfUpstream()), - "EndOfUpstream": lambda _: MarkerFrame(EndOfUpstream()), - } - - shared_queue.put( - DataElement( - tag=data_header.tag, - payload=payload_map[data_header.payload_type](table), - ) - ) - + match data_header.payload_type: + case "Data": + payload = DataFrame(table) + case "State": + payload = MarkerFrame(State(table)) + case "StartOfUpstream": + payload = MarkerFrame(StartOfUpstream()) + case "EndOfUpstream": + payload = MarkerFrame(EndOfUpstream()) + case _: + raise NotImplementedError() + + shared_queue.put(DataElement(tag=data_header.tag, payload=payload)) return shared_queue.in_mem_size() self._proxy_server.register_data_handler(data_handler) From dbe12a050aae260a831d8014a519e80ce4fbb170 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 03:45:09 -0700 Subject: [PATCH 144/163] update --- .../pythonworker/PythonProxyServer.scala | 34 +++++++++---------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala index 081af0dd4bd..6da5eecaf58 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala @@ -103,26 +103,24 @@ private class AmberProducer( // closing the stream will release the dictionaries flightStream.takeDictionaryOwnership - if (dataHeader.payloadType == StartOfUpstream().getClass.getSimpleName) { - assert(root.getRowCount == 0) - outputPort.sendTo(to, MarkerFrame(StartOfUpstream())) - } else if (dataHeader.payloadType == EndOfUpstream().getClass.getSimpleName) { - assert(root.getRowCount == 0) - outputPort.sendTo(to, MarkerFrame(EndOfUpstream())) - } else if (dataHeader.payloadType == State().getClass.getSimpleName) { - assert(root.getRowCount == 1) - outputPort.sendTo(to, MarkerFrame(State(Some(ArrowUtils.getTexeraTuple(0, root))))) - } else { - // normal data batches - val queue = mutable.Queue[Tuple]() - for (i <- 0 until root.getRowCount) - queue.enqueue(ArrowUtils.getTexeraTuple(i, root)) - outputPort.sendTo(to, DataFrame(queue.toArray)) - + dataHeader.payloadType match { + case "StartOfUpstream" => + assert(root.getRowCount == 0) + outputPort.sendTo(to, MarkerFrame(StartOfUpstream())) + case "EndOfUpstream" => + assert(root.getRowCount == 0) + outputPort.sendTo(to, MarkerFrame(EndOfUpstream())) + case "State" => + assert(root.getRowCount == 1) + outputPort.sendTo(to, MarkerFrame(State(Some(ArrowUtils.getTexeraTuple(0, root)))) + ) + case _ => // normal data batches + val queue = mutable.Queue[Tuple]() + for (i <- 0 until root.getRowCount) + queue.enqueue(ArrowUtils.getTexeraTuple(i, root)) + outputPort.sendTo(to, DataFrame(queue.toArray)) } - } - } class PythonProxyServer( From 6903721f078e5203578bbc3418d26b84e16723e0 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 03:45:30 -0700 Subject: [PATCH 145/163] update --- .../pythonworker/PythonProxyServer.scala | 29 +++++++++---------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala index 6da5eecaf58..c829c6d8294 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala @@ -104,21 +104,20 @@ private class AmberProducer( flightStream.takeDictionaryOwnership dataHeader.payloadType match { - case "StartOfUpstream" => - assert(root.getRowCount == 0) - outputPort.sendTo(to, MarkerFrame(StartOfUpstream())) - case "EndOfUpstream" => - assert(root.getRowCount == 0) - outputPort.sendTo(to, MarkerFrame(EndOfUpstream())) - case "State" => - assert(root.getRowCount == 1) - outputPort.sendTo(to, MarkerFrame(State(Some(ArrowUtils.getTexeraTuple(0, root)))) - ) - case _ => // normal data batches - val queue = mutable.Queue[Tuple]() - for (i <- 0 until root.getRowCount) - queue.enqueue(ArrowUtils.getTexeraTuple(i, root)) - outputPort.sendTo(to, DataFrame(queue.toArray)) + case "StartOfUpstream" => + assert(root.getRowCount == 0) + outputPort.sendTo(to, MarkerFrame(StartOfUpstream())) + case "EndOfUpstream" => + assert(root.getRowCount == 0) + outputPort.sendTo(to, MarkerFrame(EndOfUpstream())) + case "State" => + assert(root.getRowCount == 1) + outputPort.sendTo(to, MarkerFrame(State(Some(ArrowUtils.getTexeraTuple(0, root))))) + case _ => // normal data batches + val queue = mutable.Queue[Tuple]() + for (i <- 0 until root.getRowCount) + queue.enqueue(ArrowUtils.getTexeraTuple(i, root)) + outputPort.sendTo(to, DataFrame(queue.toArray)) } } } From 4045d2f65d374e60b8b7b8ffe27ef94ceee7d8e8 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 20:47:30 -0700 Subject: [PATCH 146/163] update --- core/amber/src/main/python/core/models/operator.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/amber/src/main/python/core/models/operator.py b/core/amber/src/main/python/core/models/operator.py index db9916bff8d..ed04d5014f5 100644 --- a/core/amber/src/main/python/core/models/operator.py +++ b/core/amber/src/main/python/core/models/operator.py @@ -50,6 +50,8 @@ def close(self) -> None: def process_state(self, state: State, port: int) -> Optional[State]: """ Process an input State from the given link. + The default implementation is to pass the State to all downstream operators + if the State has pass_to_all_downstream set to True. :param state: State, a State from an input port to be processed. :param port: int, input port index of the current exhausted port. :return: State, producing one State object From c471697d6f0a926e8585e6bae651ab627863eca2 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 20:52:09 -0700 Subject: [PATCH 147/163] update --- .../python/core/architecture/packaging/input_manager.py | 4 ++-- core/amber/src/main/python/core/models/internal_marker.py | 4 +++- core/amber/src/main/python/core/runnables/main_loop.py | 8 ++++---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index b58e489ff61..1b3faa1ad99 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -5,7 +5,7 @@ InternalMarker, StartOfOutputPorts, EndOfOutputPorts, - SenderChange, + SenderChange, EndOfInputPort, ) from core.models.marker import EndOfUpstream, State, StartOfUpstream, Marker from core.models.payload import DataFrame, DataPayload, MarkerFrame @@ -143,7 +143,7 @@ def _process_marker(self, marker: Marker) -> Iterator[InternalMarker]: ) if port_completed: - yield EndOfUpstream() # EndOfInputPort() + yield EndOfInputPort() all_ports_completed = all( map(lambda port: port.is_completed(), self._ports.values()) diff --git a/core/amber/src/main/python/core/models/internal_marker.py b/core/amber/src/main/python/core/models/internal_marker.py index 7063c4fdff8..27e0f2ef349 100644 --- a/core/amber/src/main/python/core/models/internal_marker.py +++ b/core/amber/src/main/python/core/models/internal_marker.py @@ -18,12 +18,14 @@ class InternalMarker: class SenderChange(InternalMarker): channel_id: ChannelIdentity +@dataclass +class EndOfInputPort(InternalMarker): + pass @dataclass class StartOfOutputPorts(InternalMarker): pass - @dataclass class EndOfOutputPorts(InternalMarker): pass diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index 14d3f76e3c4..ab05a37b5e2 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -17,7 +17,7 @@ SenderChange, Tuple, ) -from core.models.internal_marker import StartOfOutputPorts +from core.models.internal_marker import StartOfOutputPorts, EndOfInputPort from core.models.internal_queue import DataElement, ControlElement from core.models.marker import State, EndOfUpstream, StartOfUpstream from core.runnables.data_processor import DataProcessor @@ -218,7 +218,7 @@ def _process_start_of_upstream(self, start_of_upstream: StartOfUpstream) -> None self.context.marker_processing_manager.current_input_marker = start_of_upstream self.process_input_state() - def _process_end_of_upstream(self, end_of_upstream: EndOfUpstream) -> None: + def _process_end_of_input_port(self, end_of_upstream: EndOfUpstream) -> None: self.context.marker_processing_manager.current_input_marker = end_of_upstream self.process_input_state() self.process_input_tuple() @@ -313,8 +313,8 @@ def _process_data_element(self, data_element: DataElement) -> None: self._process_tuple, StartOfUpstream, self._process_start_of_upstream, - EndOfUpstream, - self._process_end_of_upstream, + EndOfInputPort, + self._process_end_of_input_port, SenderChange, self._process_sender_change_marker, StartOfOutputPorts, From 18da3a5f95be8b2ec4058175a8adf22aae049a9d Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 20:53:03 -0700 Subject: [PATCH 148/163] Revert "update" This reverts commit c471697d6f0a926e8585e6bae651ab627863eca2. --- .../python/core/architecture/packaging/input_manager.py | 4 ++-- core/amber/src/main/python/core/models/internal_marker.py | 4 +--- core/amber/src/main/python/core/runnables/main_loop.py | 8 ++++---- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index 1b3faa1ad99..b58e489ff61 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -5,7 +5,7 @@ InternalMarker, StartOfOutputPorts, EndOfOutputPorts, - SenderChange, EndOfInputPort, + SenderChange, ) from core.models.marker import EndOfUpstream, State, StartOfUpstream, Marker from core.models.payload import DataFrame, DataPayload, MarkerFrame @@ -143,7 +143,7 @@ def _process_marker(self, marker: Marker) -> Iterator[InternalMarker]: ) if port_completed: - yield EndOfInputPort() + yield EndOfUpstream() # EndOfInputPort() all_ports_completed = all( map(lambda port: port.is_completed(), self._ports.values()) diff --git a/core/amber/src/main/python/core/models/internal_marker.py b/core/amber/src/main/python/core/models/internal_marker.py index 27e0f2ef349..7063c4fdff8 100644 --- a/core/amber/src/main/python/core/models/internal_marker.py +++ b/core/amber/src/main/python/core/models/internal_marker.py @@ -18,14 +18,12 @@ class InternalMarker: class SenderChange(InternalMarker): channel_id: ChannelIdentity -@dataclass -class EndOfInputPort(InternalMarker): - pass @dataclass class StartOfOutputPorts(InternalMarker): pass + @dataclass class EndOfOutputPorts(InternalMarker): pass diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index ab05a37b5e2..14d3f76e3c4 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -17,7 +17,7 @@ SenderChange, Tuple, ) -from core.models.internal_marker import StartOfOutputPorts, EndOfInputPort +from core.models.internal_marker import StartOfOutputPorts from core.models.internal_queue import DataElement, ControlElement from core.models.marker import State, EndOfUpstream, StartOfUpstream from core.runnables.data_processor import DataProcessor @@ -218,7 +218,7 @@ def _process_start_of_upstream(self, start_of_upstream: StartOfUpstream) -> None self.context.marker_processing_manager.current_input_marker = start_of_upstream self.process_input_state() - def _process_end_of_input_port(self, end_of_upstream: EndOfUpstream) -> None: + def _process_end_of_upstream(self, end_of_upstream: EndOfUpstream) -> None: self.context.marker_processing_manager.current_input_marker = end_of_upstream self.process_input_state() self.process_input_tuple() @@ -313,8 +313,8 @@ def _process_data_element(self, data_element: DataElement) -> None: self._process_tuple, StartOfUpstream, self._process_start_of_upstream, - EndOfInputPort, - self._process_end_of_input_port, + EndOfUpstream, + self._process_end_of_upstream, SenderChange, self._process_sender_change_marker, StartOfOutputPorts, From c60e606c936ecb6a75715e622747335b22105449 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 21:18:45 -0700 Subject: [PATCH 149/163] update --- .../architecture/packaging/input_manager.py | 5 +++-- .../main/python/core/models/internal_marker.py | 14 +++++++++++--- .../src/main/python/core/models/marker.py | 7 ++++--- .../python/core/runnables/data_processor.py | 7 ++++--- .../main/python/core/runnables/main_loop.py | 18 +++++++++--------- 5 files changed, 31 insertions(+), 20 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index b58e489ff61..78bff2cf606 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -6,6 +6,7 @@ StartOfOutputPorts, EndOfOutputPorts, SenderChange, + EndOfInputPort, StartOfInputPort, ) from core.models.marker import EndOfUpstream, State, StartOfUpstream, Marker from core.models.payload import DataFrame, DataPayload, MarkerFrame @@ -130,7 +131,7 @@ def _process_marker(self, marker: Marker) -> Iterator[InternalMarker]: if not self.started: yield StartOfOutputPorts() self.started = True - yield StartOfUpstream() # StartOfInputChannel() + yield StartOfInputPort() if isinstance(marker, EndOfUpstream): # EndOfInputChannel() channel = self._channels[self._current_channel_id] channel.complete() @@ -143,7 +144,7 @@ def _process_marker(self, marker: Marker) -> Iterator[InternalMarker]: ) if port_completed: - yield EndOfUpstream() # EndOfInputPort() + yield EndOfInputPort() all_ports_completed = all( map(lambda port: port.is_completed(), self._ports.values()) diff --git a/core/amber/src/main/python/core/models/internal_marker.py b/core/amber/src/main/python/core/models/internal_marker.py index 7063c4fdff8..bdcafeeea0d 100644 --- a/core/amber/src/main/python/core/models/internal_marker.py +++ b/core/amber/src/main/python/core/models/internal_marker.py @@ -1,11 +1,10 @@ from dataclasses import dataclass - - +from core.models.marker import Marker from proto.edu.uci.ics.amber.engine.common import ChannelIdentity @dataclass -class InternalMarker: +class InternalMarker(Marker): """ A special Data Message, only being generated in un-packaging a batch into Tuples. Markers retain the order information and served as a indicator of data state. @@ -19,6 +18,15 @@ class SenderChange(InternalMarker): channel_id: ChannelIdentity +@dataclass +class StartOfInputPort(InternalMarker): + pass + +@dataclass +class EndOfInputPort(InternalMarker): + pass + + @dataclass class StartOfOutputPorts(InternalMarker): pass diff --git a/core/amber/src/main/python/core/models/marker.py b/core/amber/src/main/python/core/models/marker.py index 04ec2e99de7..e86954dc087 100644 --- a/core/amber/src/main/python/core/models/marker.py +++ b/core/amber/src/main/python/core/models/marker.py @@ -1,7 +1,8 @@ from dataclasses import dataclass -from pyarrow import Table from pandas import DataFrame +from pyarrow import Table from typing import Optional + from .schema import Schema, AttributeType from .schema.attribute_type import FROM_PYOBJECT_MAPPING @@ -24,7 +25,7 @@ class EndOfUpstream(Marker): @dataclass class State(Marker): def __init__( - self, table: Optional[Table] = None, pass_to_all_downstream: bool = False + self, table: Optional[Table] = None, pass_to_all_downstream: bool = False ): if table is None: self.data = {} @@ -35,7 +36,7 @@ def __init__( self.schema = Schema(table.schema) def add( - self, key: str, value: any, value_type: Optional[AttributeType] = None + self, key: str, value: any, value_type: Optional[AttributeType] = None ) -> None: self.data[key] = value if value_type is not None: diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index 454d17ba636..922061c5381 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -7,7 +7,8 @@ from typing import Iterator, Optional from core.architecture.managers import Context from core.models import ExceptionInfo, State, TupleLike -from core.models.marker import Marker, StartOfUpstream, EndOfUpstream +from core.models.internal_marker import StartOfInputPort, EndOfInputPort +from core.models.marker import Marker from core.models.table import all_output_to_tuple from core.util import Stoppable from core.util.console_message.replace_print import replace_print @@ -48,11 +49,11 @@ def process_marker(self, marker: Marker) -> None: self._context.worker_id, self._context.console_message_manager.print_buf, ): - if isinstance(marker, StartOfUpstream): + if isinstance(marker, StartOfInputPort): self._set_output_state(executor.produce_state_on_start(port_id)) elif isinstance(marker, State): self._set_output_state(executor.process_state(marker, port_id)) - elif isinstance(marker, EndOfUpstream): + elif isinstance(marker, EndOfInputPort): self._set_output_state(executor.produce_state_on_finish(port_id)) self._set_output_tuple(executor.on_finish(port_id)) diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index 14d3f76e3c4..0b4976bd33f 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -17,7 +17,7 @@ SenderChange, Tuple, ) -from core.models.internal_marker import StartOfOutputPorts +from core.models.internal_marker import StartOfOutputPorts, EndOfInputPort, StartOfInputPort from core.models.internal_queue import DataElement, ControlElement from core.models.marker import State, EndOfUpstream, StartOfUpstream from core.runnables.data_processor import DataProcessor @@ -214,12 +214,12 @@ def _process_state(self, state_: State) -> None: self.process_input_state() self._check_and_process_control() - def _process_start_of_upstream(self, start_of_upstream: StartOfUpstream) -> None: - self.context.marker_processing_manager.current_input_marker = start_of_upstream + def _process_start_of_input_port(self, start_of_input_port: StartOfInputPort) -> None: + self.context.marker_processing_manager.current_input_marker = start_of_input_port self.process_input_state() - def _process_end_of_upstream(self, end_of_upstream: EndOfUpstream) -> None: - self.context.marker_processing_manager.current_input_marker = end_of_upstream + def _process_end_of_input_port(self, end_of_input_port: EndOfInputPort) -> None: + self.context.marker_processing_manager.current_input_marker = end_of_input_port self.process_input_state() self.process_input_tuple() if self.context.tuple_processing_manager.current_input_port_id is not None: @@ -311,10 +311,10 @@ def _process_data_element(self, data_element: DataElement) -> None: element, Tuple, self._process_tuple, - StartOfUpstream, - self._process_start_of_upstream, - EndOfUpstream, - self._process_end_of_upstream, + StartOfInputPort, + self._process_start_of_input_port, + EndOfInputPort, + self._process_end_of_input_port, SenderChange, self._process_sender_change_marker, StartOfOutputPorts, From 96230f787da0e6d044543e15f441ebe5bd0a9d41 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 22:01:42 -0700 Subject: [PATCH 150/163] rename StartOfUpstream --- .../core/architecture/packaging/input_manager.py | 4 ++-- core/amber/src/main/python/core/models/marker.py | 2 +- core/amber/src/main/python/core/runnables/main_loop.py | 6 +++--- .../src/main/python/core/runnables/network_receiver.py | 6 +++--- .../architecture/pythonworker/PythonProxyServer.scala | 6 +++--- .../engine/architecture/worker/DataProcessor.scala | 10 +++++----- .../worker/promisehandlers/StartHandler.scala | 4 ++-- .../edu/uci/ics/texera/workflow/common/Marker.scala | 2 +- 8 files changed, 20 insertions(+), 20 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index 78bff2cf606..b6592e95746 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -8,7 +8,7 @@ SenderChange, EndOfInputPort, StartOfInputPort, ) -from core.models.marker import EndOfUpstream, State, StartOfUpstream, Marker +from core.models.marker import EndOfUpstream, State, StartOfInputChannel, Marker from core.models.payload import DataFrame, DataPayload, MarkerFrame from proto.edu.uci.ics.amber.engine.common import ( ActorVirtualIdentity, @@ -127,7 +127,7 @@ def _process_data(self, table: Table) -> Iterator[Tuple]: def _process_marker(self, marker: Marker) -> Iterator[InternalMarker]: if isinstance(marker, State): yield marker - if isinstance(marker, StartOfUpstream): # StartOfInputChannel() + if isinstance(marker, StartOfInputChannel): # StartOfInputChannel() if not self.started: yield StartOfOutputPorts() self.started = True diff --git a/core/amber/src/main/python/core/models/marker.py b/core/amber/src/main/python/core/models/marker.py index e86954dc087..372cf2acc9e 100644 --- a/core/amber/src/main/python/core/models/marker.py +++ b/core/amber/src/main/python/core/models/marker.py @@ -13,7 +13,7 @@ class Marker: @dataclass -class StartOfUpstream(Marker): +class StartOfInputChannel(Marker): pass diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index 0b4976bd33f..95e3ccbc502 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -19,7 +19,7 @@ ) from core.models.internal_marker import StartOfOutputPorts, EndOfInputPort, StartOfInputPort from core.models.internal_queue import DataElement, ControlElement -from core.models.marker import State, EndOfUpstream, StartOfUpstream +from core.models.marker import State, EndOfUpstream, StartOfInputChannel from core.runnables.data_processor import DataProcessor from core.util import StoppableQueueBlockingRunnable, get_one_of, set_one_of from core.util.customized_queue.queue_base import QueueElement @@ -249,11 +249,11 @@ def _process_start_of_output_ports(self, _: StartOfOutputPorts) -> None: """ Upon receipt of an StartOfAllMarker, which indicates the start of any input links, - send the StartOfUpstream to all downstream workers. + send the StartOfInputChannel to all downstream workers. :param _: StartOfAny Internal Marker """ - for to, batch in self.context.output_manager.emit_marker(StartOfUpstream()): + for to, batch in self.context.output_manager.emit_marker(StartOfInputChannel()): self._output_queue.put(DataElement(tag=to, payload=batch)) self._check_and_process_control() diff --git a/core/amber/src/main/python/core/runnables/network_receiver.py b/core/amber/src/main/python/core/runnables/network_receiver.py index 351f07ae517..fc9ba05dace 100644 --- a/core/amber/src/main/python/core/runnables/network_receiver.py +++ b/core/amber/src/main/python/core/runnables/network_receiver.py @@ -17,7 +17,7 @@ MarkerFrame, ) from core.models.internal_queue import DataElement, ControlElement, InternalQueue -from core.models.marker import EndOfUpstream, State, StartOfUpstream +from core.models.marker import EndOfUpstream, State, StartOfInputChannel from core.proxy import ProxyServer from core.util import Stoppable, get_one_of from core.util.runnable.runnable import Runnable @@ -69,8 +69,8 @@ def data_handler(command: bytes, table: Table) -> int: payload = DataFrame(table) case "State": payload = MarkerFrame(State(table)) - case "StartOfUpstream": - payload = MarkerFrame(StartOfUpstream()) + case "StartOfInputChannel": + payload = MarkerFrame(StartOfInputChannel()) case "EndOfUpstream": payload = MarkerFrame(EndOfUpstream()) case _: diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala index c829c6d8294..b65f00f9dad 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala @@ -20,7 +20,7 @@ import java.net.ServerSocket import java.util.concurrent.atomic.AtomicInteger import scala.collection.mutable import com.twitter.util.Promise -import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfUpstream, State} +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfInputChannel, State} import java.nio.charset.Charset @@ -104,9 +104,9 @@ private class AmberProducer( flightStream.takeDictionaryOwnership dataHeader.payloadType match { - case "StartOfUpstream" => + case "StartOfInputChannel" => assert(root.getRowCount == 0) - outputPort.sendTo(to, MarkerFrame(StartOfUpstream())) + outputPort.sendTo(to, MarkerFrame(StartOfInputChannel())) case "EndOfUpstream" => assert(root.getRowCount == 0) outputPort.sendTo(to, MarkerFrame(EndOfUpstream())) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index e9e25eed014..bd95fcc96ad 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -33,7 +33,7 @@ import edu.uci.ics.amber.engine.common.virtualidentity.util.{CONTROLLER, SELF} import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, ChannelIdentity} import edu.uci.ics.amber.engine.common.workflow.PortIdentity import edu.uci.ics.amber.error.ErrorUtils.{mkConsoleMessage, safely} -import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfUpstream, State} +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfInputChannel, State} import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple @@ -109,9 +109,9 @@ class DataProcessor( * process start of an input port with Executor.produceStateOnStart(). * this function is only called by the DP thread. */ - private[this] def processStartOfUpstream(portId: Int): Unit = { + private[this] def processStartOfInputChannel(portId: Int): Unit = { try { - outputManager.emitMarker(StartOfUpstream()) + outputManager.emitMarker(StartOfInputChannel()) val outputState = executor.produceStateOnStart(portId) if (outputState.isDefined) { outputManager.emitMarker(outputState.get) @@ -226,8 +226,8 @@ class DataProcessor( marker match { case state: State => processInputState(state, portId.id) - case StartOfUpstream() => - processStartOfUpstream(portId.id) + case StartOfInputChannel() => + processStartOfInputChannel(portId.id) case EndOfUpstream() => this.inputManager.getPort(portId).channels(channelId) = true if (inputManager.isPortCompleted(portId)) { diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala index 6d8020d1209..d6854fb1643 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala @@ -11,7 +11,7 @@ import edu.uci.ics.amber.engine.common.rpc.AsyncRPCServer.ControlCommand import edu.uci.ics.amber.engine.common.virtualidentity.ChannelIdentity import edu.uci.ics.amber.engine.common.virtualidentity.util.SOURCE_STARTER_ACTOR import edu.uci.ics.amber.engine.common.workflow.PortIdentity -import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfUpstream} +import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfInputChannel} object StartHandler { final case class StartWorker() extends ControlCommand[WorkerState] @@ -33,7 +33,7 @@ trait StartHandler { .setPortId(dummyInputPortId) dp.processDataPayload( ChannelIdentity(SOURCE_STARTER_ACTOR, dp.actorId, isControl = false), - MarkerFrame(StartOfUpstream()) + MarkerFrame(StartOfInputChannel()) ) dp.processDataPayload( ChannelIdentity(SOURCE_STARTER_ACTOR, dp.actorId, isControl = false), diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index 403afc485b3..4119aff066a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -6,7 +6,7 @@ import scala.collection.mutable sealed trait Marker -final case class StartOfUpstream() extends Marker +final case class StartOfInputChannel() extends Marker final case class EndOfUpstream() extends Marker final case class State(tuple: Option[Tuple] = None, passToAllDownstream: Boolean = false) From c5632a3a4e0f01d6d4f7640cbcb9afd69b8b1e34 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 22:04:00 -0700 Subject: [PATCH 151/163] rename EndOfUpstream --- .../core/architecture/packaging/input_manager.py | 6 +++--- core/amber/src/main/python/core/models/marker.py | 2 +- core/amber/src/main/python/core/runnables/main_loop.py | 4 ++-- .../src/main/python/core/runnables/network_receiver.py | 6 +++--- .../src/main/python/core/runnables/network_sender.py | 2 +- .../src/main/python/core/runnables/test_main_loop.py | 8 ++++---- .../python/core/runnables/test_network_receiver.py | 6 +++--- .../architecture/pythonworker/PythonProxyServer.scala | 6 +++--- .../engine/architecture/worker/DataProcessor.scala | 10 +++++----- .../worker/promisehandlers/StartHandler.scala | 4 ++-- .../edu/uci/ics/texera/workflow/common/Marker.scala | 2 +- .../messaginglayer/OutputManagerSpec.scala | 6 +++--- .../engine/architecture/worker/DataProcessorSpec.scala | 6 +++--- 13 files changed, 34 insertions(+), 34 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index b6592e95746..8aad9e82694 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -8,7 +8,7 @@ SenderChange, EndOfInputPort, StartOfInputPort, ) -from core.models.marker import EndOfUpstream, State, StartOfInputChannel, Marker +from core.models.marker import EndOfInputChannel, State, StartOfInputChannel, Marker from core.models.payload import DataFrame, DataPayload, MarkerFrame from proto.edu.uci.ics.amber.engine.common import ( ActorVirtualIdentity, @@ -88,7 +88,7 @@ def process_data_payload( ) -> Iterator[Union[Tuple, InternalMarker]]: # special case used to yield for source op if from_ == InputManager.SOURCE_STARTER: - yield EndOfUpstream() + yield EndOfInputChannel() yield EndOfOutputPorts() return @@ -132,7 +132,7 @@ def _process_marker(self, marker: Marker) -> Iterator[InternalMarker]: yield StartOfOutputPorts() self.started = True yield StartOfInputPort() - if isinstance(marker, EndOfUpstream): # EndOfInputChannel() + if isinstance(marker, EndOfInputChannel): # EndOfInputChannel() channel = self._channels[self._current_channel_id] channel.complete() port_id = channel.port_id diff --git a/core/amber/src/main/python/core/models/marker.py b/core/amber/src/main/python/core/models/marker.py index 372cf2acc9e..60e3a86bb9c 100644 --- a/core/amber/src/main/python/core/models/marker.py +++ b/core/amber/src/main/python/core/models/marker.py @@ -18,7 +18,7 @@ class StartOfInputChannel(Marker): @dataclass -class EndOfUpstream(Marker): +class EndOfInputChannel(Marker): pass diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index 95e3ccbc502..81aac3079c6 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -19,7 +19,7 @@ ) from core.models.internal_marker import StartOfOutputPorts, EndOfInputPort, StartOfInputPort from core.models.internal_queue import DataElement, ControlElement -from core.models.marker import State, EndOfUpstream, StartOfInputChannel +from core.models.marker import State, EndOfInputChannel, StartOfInputChannel from core.runnables.data_processor import DataProcessor from core.util import StoppableQueueBlockingRunnable, get_one_of, set_one_of from core.util.customized_queue.queue_base import QueueElement @@ -266,7 +266,7 @@ def _process_end_of_output_ports(self, _: EndOfOutputPorts) -> None: :param _: EndOfOutputPorts """ - for to, batch in self.context.output_manager.emit_marker(EndOfUpstream()): + for to, batch in self.context.output_manager.emit_marker(EndOfInputChannel()): self._output_queue.put(DataElement(tag=to, payload=batch)) self._check_and_process_control() control_command = set_one_of( diff --git a/core/amber/src/main/python/core/runnables/network_receiver.py b/core/amber/src/main/python/core/runnables/network_receiver.py index fc9ba05dace..98e90df12e2 100644 --- a/core/amber/src/main/python/core/runnables/network_receiver.py +++ b/core/amber/src/main/python/core/runnables/network_receiver.py @@ -17,7 +17,7 @@ MarkerFrame, ) from core.models.internal_queue import DataElement, ControlElement, InternalQueue -from core.models.marker import EndOfUpstream, State, StartOfInputChannel +from core.models.marker import EndOfInputChannel, State, StartOfInputChannel from core.proxy import ProxyServer from core.util import Stoppable, get_one_of from core.util.runnable.runnable import Runnable @@ -71,8 +71,8 @@ def data_handler(command: bytes, table: Table) -> int: payload = MarkerFrame(State(table)) case "StartOfInputChannel": payload = MarkerFrame(StartOfInputChannel()) - case "EndOfUpstream": - payload = MarkerFrame(EndOfUpstream()) + case "EndOfInputChannel": + payload = MarkerFrame(EndOfInputChannel()) case _: raise NotImplementedError() diff --git a/core/amber/src/main/python/core/runnables/network_sender.py b/core/amber/src/main/python/core/runnables/network_sender.py index b8c458192a9..031f2783902 100644 --- a/core/amber/src/main/python/core/runnables/network_sender.py +++ b/core/amber/src/main/python/core/runnables/network_sender.py @@ -50,7 +50,7 @@ def _send_data(self, to: ActorVirtualIdentity, data_payload: DataPayload) -> Non :param to: The target actor's ActorVirtualIdentity :param data_payload: The data payload to be sent, can be either DataFrame or - EndOfUpstream + EndOfInputChannel """ if isinstance(data_payload, DataFrame): diff --git a/core/amber/src/main/python/core/runnables/test_main_loop.py b/core/amber/src/main/python/core/runnables/test_main_loop.py index 57d73058233..bb342601df6 100644 --- a/core/amber/src/main/python/core/runnables/test_main_loop.py +++ b/core/amber/src/main/python/core/runnables/test_main_loop.py @@ -13,7 +13,7 @@ Tuple, ) from core.models.internal_queue import DataElement, ControlElement -from core.models.marker import EndOfUpstream +from core.models.marker import EndOfInputChannel from core.runnables import MainLoop from core.util import set_one_of from proto.edu.uci.ics.amber.engine.architecture.sendsemantics import ( @@ -135,7 +135,7 @@ def mock_batch_data_elements(self, mock_batch, mock_sender_actor): @pytest.fixture def mock_end_of_upstream(self, mock_tuple, mock_sender_actor): - return DataElement(tag=mock_sender_actor, payload=MarkerFrame(EndOfUpstream())) + return DataElement(tag=mock_sender_actor, payload=MarkerFrame(EndOfInputChannel())) @pytest.fixture def input_queue(self): @@ -533,7 +533,7 @@ def test_main_loop_thread_can_process_messages( ), ) - # can process EndOfUpstream + # can process EndOfInputChannel input_queue.put(mock_end_of_upstream) # the input port should complete @@ -580,7 +580,7 @@ def test_main_loop_thread_can_process_messages( ) assert output_queue.get() == DataElement( - tag=mock_receiver_actor, payload=MarkerFrame(EndOfUpstream()) + tag=mock_receiver_actor, payload=MarkerFrame(EndOfInputChannel()) ) # can process ReturnInvocation diff --git a/core/amber/src/main/python/core/runnables/test_network_receiver.py b/core/amber/src/main/python/core/runnables/test_network_receiver.py index 5728e35f8d5..ec682237935 100644 --- a/core/amber/src/main/python/core/runnables/test_network_receiver.py +++ b/core/amber/src/main/python/core/runnables/test_network_receiver.py @@ -4,7 +4,7 @@ from pyarrow import Table from core.models.internal_queue import InternalQueue, ControlElement, DataElement -from core.models.marker import EndOfUpstream +from core.models.marker import EndOfInputChannel from core.models.payload import MarkerFrame, DataFrame from core.proxy import ProxyClient from core.runnables.network_receiver import NetworkReceiver @@ -121,11 +121,11 @@ def test_network_receiver_can_receive_data_messages_end_of_upstream( network_sender_thread.start() worker_id = ActorVirtualIdentity(name="test") input_queue.put( - DataElement(tag=worker_id, payload=MarkerFrame(EndOfUpstream())) + DataElement(tag=worker_id, payload=MarkerFrame(EndOfInputChannel())) ) element: DataElement = output_queue.get() assert isinstance(element.payload, MarkerFrame) - assert element.payload.frame == EndOfUpstream() + assert element.payload.frame == EndOfInputChannel() assert element.tag == worker_id @pytest.mark.timeout(2) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala index b65f00f9dad..6ac9291283d 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/pythonworker/PythonProxyServer.scala @@ -20,7 +20,7 @@ import java.net.ServerSocket import java.util.concurrent.atomic.AtomicInteger import scala.collection.mutable import com.twitter.util.Promise -import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfInputChannel, State} +import edu.uci.ics.texera.workflow.common.{EndOfInputChannel, StartOfInputChannel, State} import java.nio.charset.Charset @@ -107,9 +107,9 @@ private class AmberProducer( case "StartOfInputChannel" => assert(root.getRowCount == 0) outputPort.sendTo(to, MarkerFrame(StartOfInputChannel())) - case "EndOfUpstream" => + case "EndOfInputChannel" => assert(root.getRowCount == 0) - outputPort.sendTo(to, MarkerFrame(EndOfUpstream())) + outputPort.sendTo(to, MarkerFrame(EndOfInputChannel())) case "State" => assert(root.getRowCount == 1) outputPort.sendTo(to, MarkerFrame(State(Some(ArrowUtils.getTexeraTuple(0, root))))) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index bd95fcc96ad..1c93b19fd6a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -33,7 +33,7 @@ import edu.uci.ics.amber.engine.common.virtualidentity.util.{CONTROLLER, SELF} import edu.uci.ics.amber.engine.common.virtualidentity.{ActorVirtualIdentity, ChannelIdentity} import edu.uci.ics.amber.engine.common.workflow.PortIdentity import edu.uci.ics.amber.error.ErrorUtils.{mkConsoleMessage, safely} -import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfInputChannel, State} +import edu.uci.ics.texera.workflow.common.{EndOfInputChannel, StartOfInputChannel, State} import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple @@ -126,7 +126,7 @@ class DataProcessor( * process end of an input port with Executor.produceStateOnFinish(). * this function is only called by the DP thread. */ - private[this] def processEndOfUpstream(portId: Int): Unit = { + private[this] def processEndOfInputChannel(portId: Int): Unit = { try { val outputState = executor.produceStateOnFinish(portId) if (outputState != null && outputState.isDefined) { @@ -167,7 +167,7 @@ class DataProcessor( outputTuple match { case FinalizeExecutor() => - outputManager.emitMarker(EndOfUpstream()) + outputManager.emitMarker(EndOfInputChannel()) // Send Completed signal to worker actor. executor.close() adaptiveBatchingMonitor.stopAdaptiveBatching() @@ -228,11 +228,11 @@ class DataProcessor( processInputState(state, portId.id) case StartOfInputChannel() => processStartOfInputChannel(portId.id) - case EndOfUpstream() => + case EndOfInputChannel() => this.inputManager.getPort(portId).channels(channelId) = true if (inputManager.isPortCompleted(portId)) { inputManager.initBatch(channelId, Array.empty) - processEndOfUpstream(portId.id) + processEndOfInputChannel(portId.id) outputManager.outputIterator.appendSpecialTupleToEnd( FinalizePort(portId, input = true) ) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala index d6854fb1643..431705c7c69 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/promisehandlers/StartHandler.scala @@ -11,7 +11,7 @@ import edu.uci.ics.amber.engine.common.rpc.AsyncRPCServer.ControlCommand import edu.uci.ics.amber.engine.common.virtualidentity.ChannelIdentity import edu.uci.ics.amber.engine.common.virtualidentity.util.SOURCE_STARTER_ACTOR import edu.uci.ics.amber.engine.common.workflow.PortIdentity -import edu.uci.ics.texera.workflow.common.{EndOfUpstream, StartOfInputChannel} +import edu.uci.ics.texera.workflow.common.{EndOfInputChannel, StartOfInputChannel} object StartHandler { final case class StartWorker() extends ControlCommand[WorkerState] @@ -37,7 +37,7 @@ trait StartHandler { ) dp.processDataPayload( ChannelIdentity(SOURCE_STARTER_ACTOR, dp.actorId, isControl = false), - MarkerFrame(EndOfUpstream()) + MarkerFrame(EndOfInputChannel()) ) dp.stateManager.getCurrentState } else { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index 4119aff066a..034a82fe386 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -7,7 +7,7 @@ import scala.collection.mutable sealed trait Marker final case class StartOfInputChannel() extends Marker -final case class EndOfUpstream() extends Marker +final case class EndOfInputChannel() extends Marker final case class State(tuple: Option[Tuple] = None, passToAllDownstream: Boolean = false) extends Marker { diff --git a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManagerSpec.scala b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManagerSpec.scala index 0ff4b539319..5cfdfbf71d3 100644 --- a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManagerSpec.scala +++ b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/messaginglayer/OutputManagerSpec.scala @@ -11,7 +11,7 @@ import edu.uci.ics.amber.engine.common.virtualidentity.{ PhysicalOpIdentity } import edu.uci.ics.amber.engine.common.workflow.{PhysicalLink, PortIdentity} -import edu.uci.ics.texera.workflow.common.EndOfUpstream +import edu.uci.ics.texera.workflow.common.EndOfInputChannel import edu.uci.ics.texera.workflow.common.tuple.schema.{AttributeType, Schema} import org.scalamock.scalatest.MockFactory import org.scalatest.flatspec.AnyFlatSpec @@ -67,7 +67,7 @@ class OutputManagerSpec extends AnyFlatSpec with MockFactory { mkDataMessage(fakeID, identifier, 2, DataFrame(tuples.slice(20, 21))) ) (mockHandler.apply _).expects( - mkDataMessage(fakeID, identifier, 3, MarkerFrame(EndOfUpstream())) + mkDataMessage(fakeID, identifier, 3, MarkerFrame(EndOfInputChannel())) ) } val fakeLink = PhysicalLink(physicalOpId(), mockPortId, physicalOpId(), mockPortId) @@ -81,7 +81,7 @@ class OutputManagerSpec extends AnyFlatSpec with MockFactory { tuples.foreach { t => outputManager.passTupleToDownstream(TupleLike(t.getFields), None) } - outputManager.emitMarker(EndOfUpstream()) + outputManager.emitMarker(EndOfInputChannel()) } } diff --git a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala index 62c159d9504..843e3e58b49 100644 --- a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala +++ b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala @@ -19,7 +19,7 @@ import edu.uci.ics.amber.engine.common.virtualidentity.{ PhysicalOpIdentity } import edu.uci.ics.amber.engine.common.workflow.PortIdentity -import edu.uci.ics.texera.workflow.common.EndOfUpstream +import edu.uci.ics.texera.workflow.common.EndOfInputChannel import edu.uci.ics.texera.workflow.common.WorkflowContext.DEFAULT_WORKFLOW_ID import edu.uci.ics.texera.workflow.common.operators.OperatorExecutor import edu.uci.ics.texera.workflow.common.tuple.Tuple @@ -114,7 +114,7 @@ class DataProcessorSpec extends AnyFlatSpec with MockFactory with BeforeAndAfter } dp.processDataPayload( ChannelIdentity(senderWorkerId, testWorkerId, isControl = false), - MarkerFrame(EndOfUpstream()) + MarkerFrame(EndOfInputChannel()) ) while (dp.inputManager.hasUnfinishedInput || dp.outputManager.hasUnfinishedOutput) { dp.continueDataProcessing() @@ -174,7 +174,7 @@ class DataProcessorSpec extends AnyFlatSpec with MockFactory with BeforeAndAfter (executor.close _).expects().once() dp.processDataPayload( ChannelIdentity(senderWorkerId, testWorkerId, isControl = false), - MarkerFrame(EndOfUpstream()) + MarkerFrame(EndOfInputChannel()) ) while (dp.inputManager.hasUnfinishedInput || dp.outputManager.hasUnfinishedOutput) { dp.continueDataProcessing() From 7e4d937df6bd98037a179085a91c4895850ad65e Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 22:09:53 -0700 Subject: [PATCH 152/163] fix fmt --- .../core/architecture/packaging/input_manager.py | 3 ++- .../src/main/python/core/models/internal_marker.py | 1 + core/amber/src/main/python/core/models/marker.py | 4 ++-- .../src/main/python/core/runnables/main_loop.py | 14 +++++++++++--- .../main/python/core/runnables/test_main_loop.py | 4 +++- 5 files changed, 19 insertions(+), 7 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index 8aad9e82694..0b553af2ea2 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -6,7 +6,8 @@ StartOfOutputPorts, EndOfOutputPorts, SenderChange, - EndOfInputPort, StartOfInputPort, + EndOfInputPort, + StartOfInputPort, ) from core.models.marker import EndOfInputChannel, State, StartOfInputChannel, Marker from core.models.payload import DataFrame, DataPayload, MarkerFrame diff --git a/core/amber/src/main/python/core/models/internal_marker.py b/core/amber/src/main/python/core/models/internal_marker.py index bdcafeeea0d..78ed5c60513 100644 --- a/core/amber/src/main/python/core/models/internal_marker.py +++ b/core/amber/src/main/python/core/models/internal_marker.py @@ -22,6 +22,7 @@ class SenderChange(InternalMarker): class StartOfInputPort(InternalMarker): pass + @dataclass class EndOfInputPort(InternalMarker): pass diff --git a/core/amber/src/main/python/core/models/marker.py b/core/amber/src/main/python/core/models/marker.py index 60e3a86bb9c..47f2b63613c 100644 --- a/core/amber/src/main/python/core/models/marker.py +++ b/core/amber/src/main/python/core/models/marker.py @@ -25,7 +25,7 @@ class EndOfInputChannel(Marker): @dataclass class State(Marker): def __init__( - self, table: Optional[Table] = None, pass_to_all_downstream: bool = False + self, table: Optional[Table] = None, pass_to_all_downstream: bool = False ): if table is None: self.data = {} @@ -36,7 +36,7 @@ def __init__( self.schema = Schema(table.schema) def add( - self, key: str, value: any, value_type: Optional[AttributeType] = None + self, key: str, value: any, value_type: Optional[AttributeType] = None ) -> None: self.data[key] = value if value_type is not None: diff --git a/core/amber/src/main/python/core/runnables/main_loop.py b/core/amber/src/main/python/core/runnables/main_loop.py index 81aac3079c6..fc8b17faad2 100644 --- a/core/amber/src/main/python/core/runnables/main_loop.py +++ b/core/amber/src/main/python/core/runnables/main_loop.py @@ -17,7 +17,11 @@ SenderChange, Tuple, ) -from core.models.internal_marker import StartOfOutputPorts, EndOfInputPort, StartOfInputPort +from core.models.internal_marker import ( + StartOfOutputPorts, + EndOfInputPort, + StartOfInputPort, +) from core.models.internal_queue import DataElement, ControlElement from core.models.marker import State, EndOfInputChannel, StartOfInputChannel from core.runnables.data_processor import DataProcessor @@ -214,8 +218,12 @@ def _process_state(self, state_: State) -> None: self.process_input_state() self._check_and_process_control() - def _process_start_of_input_port(self, start_of_input_port: StartOfInputPort) -> None: - self.context.marker_processing_manager.current_input_marker = start_of_input_port + def _process_start_of_input_port( + self, start_of_input_port: StartOfInputPort + ) -> None: + self.context.marker_processing_manager.current_input_marker = ( + start_of_input_port + ) self.process_input_state() def _process_end_of_input_port(self, end_of_input_port: EndOfInputPort) -> None: diff --git a/core/amber/src/main/python/core/runnables/test_main_loop.py b/core/amber/src/main/python/core/runnables/test_main_loop.py index bb342601df6..1623205c689 100644 --- a/core/amber/src/main/python/core/runnables/test_main_loop.py +++ b/core/amber/src/main/python/core/runnables/test_main_loop.py @@ -135,7 +135,9 @@ def mock_batch_data_elements(self, mock_batch, mock_sender_actor): @pytest.fixture def mock_end_of_upstream(self, mock_tuple, mock_sender_actor): - return DataElement(tag=mock_sender_actor, payload=MarkerFrame(EndOfInputChannel())) + return DataElement( + tag=mock_sender_actor, payload=MarkerFrame(EndOfInputChannel()) + ) @pytest.fixture def input_queue(self): From 10188144f51cb4c72bf6998f8bc776bbcc8cb018 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 23:48:33 -0700 Subject: [PATCH 153/163] update --- .../src/main/python/core/models/marker.py | 30 +++++++++---------- .../src/main/python/core/models/operator.py | 2 +- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/core/amber/src/main/python/core/models/marker.py b/core/amber/src/main/python/core/models/marker.py index 47f2b63613c..9d9b47b4257 100644 --- a/core/amber/src/main/python/core/models/marker.py +++ b/core/amber/src/main/python/core/models/marker.py @@ -27,44 +27,42 @@ class State(Marker): def __init__( self, table: Optional[Table] = None, pass_to_all_downstream: bool = False ): - if table is None: - self.data = {} - self.schema = Schema() - self.add("passToAllDownstream", pass_to_all_downstream) - else: - self.data = table.to_pandas().iloc[0].to_dict() + self.schema = Schema() + self.passToAllDownstream = pass_to_all_downstream + if table is not None: + self.__dict__.update(table.to_pandas().iloc[0].to_dict()) self.schema = Schema(table.schema) def add( self, key: str, value: any, value_type: Optional[AttributeType] = None ) -> None: - self.data[key] = value + self.__dict__[key] = value if value_type is not None: self.schema.add(key, value_type) - else: + elif key != "schema": self.schema.add(key, FROM_PYOBJECT_MAPPING[type(value)]) def get(self, key: str) -> any: - return self.data[key] - - def is_pass_to_all_downstream(self) -> bool: - return self.data["passToAllDownstream"] + return self.__dict__[key] def to_table(self) -> Table: return Table.from_pandas( - df=DataFrame([self.data]), + df=DataFrame([self.__dict__]), schema=self.schema.as_arrow_schema(), ) - def __setitem__(self, key: str, value: any, value_type: AttributeType) -> None: - self.add(key, value, value_type) + def __setattr__(self, key: str, value: any) -> None: + self.add(key, value) + + def __setitem__(self, key: str, value: any) -> None: + self.add(key, value) def __getitem__(self, key: str) -> any: return self.get(key) def __str__(self) -> str: content = ", ".join( - [repr(key) + ": " + repr(value) for key, value in self.data.items()] + [repr(key) + ": " + repr(value) for key, value in self.__dict__.items() if key != "schema"] ) return f"State[{content}]" diff --git a/core/amber/src/main/python/core/models/operator.py b/core/amber/src/main/python/core/models/operator.py index ed04d5014f5..2e420d37f8b 100644 --- a/core/amber/src/main/python/core/models/operator.py +++ b/core/amber/src/main/python/core/models/operator.py @@ -56,7 +56,7 @@ def process_state(self, state: State, port: int) -> Optional[State]: :param port: int, input port index of the current exhausted port. :return: State, producing one State object """ - if state.is_pass_to_all_downstream(): + if state.passToAllDownstream: return state def produce_state_on_start(self, port: int) -> State: From 485a143c8f62d0d35b7a1ecd22cd45fc533a6a04 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Thu, 19 Sep 2024 23:57:11 -0700 Subject: [PATCH 154/163] update --- core/amber/src/main/python/core/models/marker.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/core/amber/src/main/python/core/models/marker.py b/core/amber/src/main/python/core/models/marker.py index 9d9b47b4257..b53e44e9322 100644 --- a/core/amber/src/main/python/core/models/marker.py +++ b/core/amber/src/main/python/core/models/marker.py @@ -62,7 +62,11 @@ def __getitem__(self, key: str) -> any: def __str__(self) -> str: content = ", ".join( - [repr(key) + ": " + repr(value) for key, value in self.__dict__.items() if key != "schema"] + [ + repr(key) + ": " + repr(value) + for key, value in self.__dict__.items() + if key != "schema" + ] ) return f"State[{content}]" From a97b49d7517b022f2c212b27ee1069f62792799c Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Fri, 20 Sep 2024 02:25:50 -0700 Subject: [PATCH 155/163] update --- .../main/python/core/architecture/packaging/input_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/amber/src/main/python/core/architecture/packaging/input_manager.py b/core/amber/src/main/python/core/architecture/packaging/input_manager.py index 0b553af2ea2..519ab448628 100644 --- a/core/amber/src/main/python/core/architecture/packaging/input_manager.py +++ b/core/amber/src/main/python/core/architecture/packaging/input_manager.py @@ -128,12 +128,12 @@ def _process_data(self, table: Table) -> Iterator[Tuple]: def _process_marker(self, marker: Marker) -> Iterator[InternalMarker]: if isinstance(marker, State): yield marker - if isinstance(marker, StartOfInputChannel): # StartOfInputChannel() + if isinstance(marker, StartOfInputChannel): if not self.started: yield StartOfOutputPorts() self.started = True yield StartOfInputPort() - if isinstance(marker, EndOfInputChannel): # EndOfInputChannel() + if isinstance(marker, EndOfInputChannel): channel = self._channels[self._current_channel_id] channel.complete() port_id = channel.port_id From 52196b9b7285494152fb03ef4f107941c9572101 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Fri, 20 Sep 2024 02:47:19 -0700 Subject: [PATCH 156/163] update --- .../python/core/runnables/network_receiver.py | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/core/amber/src/main/python/core/runnables/network_receiver.py b/core/amber/src/main/python/core/runnables/network_receiver.py index 98e90df12e2..110e4904394 100644 --- a/core/amber/src/main/python/core/runnables/network_receiver.py +++ b/core/amber/src/main/python/core/runnables/network_receiver.py @@ -2,6 +2,7 @@ from overrides import overrides from pyarrow.lib import Table from typing import Optional +from pampy import match from core.architecture.handlers.actorcommand.actor_handler_base import ( ActorCommandHandler, @@ -63,18 +64,13 @@ def data_handler(command: bytes, table: Table) -> int: :return: sender credits """ data_header = PythonDataHeader().parse(command) - - match data_header.payload_type: - case "Data": - payload = DataFrame(table) - case "State": - payload = MarkerFrame(State(table)) - case "StartOfInputChannel": - payload = MarkerFrame(StartOfInputChannel()) - case "EndOfInputChannel": - payload = MarkerFrame(EndOfInputChannel()) - case _: - raise NotImplementedError() + payload = match( + data_header.payload_type, + "Data", lambda _: DataFrame(table), + "State", lambda _: MarkerFrame(State(table)), + "StartOfInputChannel", MarkerFrame(StartOfInputChannel()), + "EndOfInputChannel", MarkerFrame(EndOfInputChannel()) + ) shared_queue.put(DataElement(tag=data_header.tag, payload=payload)) return shared_queue.in_mem_size() From 687365257dccce5aa65879f8df23c9786204ff64 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Fri, 20 Sep 2024 02:47:37 -0700 Subject: [PATCH 157/163] update --- .../main/python/core/runnables/network_receiver.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/core/amber/src/main/python/core/runnables/network_receiver.py b/core/amber/src/main/python/core/runnables/network_receiver.py index 110e4904394..22d0911b9f7 100644 --- a/core/amber/src/main/python/core/runnables/network_receiver.py +++ b/core/amber/src/main/python/core/runnables/network_receiver.py @@ -66,10 +66,14 @@ def data_handler(command: bytes, table: Table) -> int: data_header = PythonDataHeader().parse(command) payload = match( data_header.payload_type, - "Data", lambda _: DataFrame(table), - "State", lambda _: MarkerFrame(State(table)), - "StartOfInputChannel", MarkerFrame(StartOfInputChannel()), - "EndOfInputChannel", MarkerFrame(EndOfInputChannel()) + "Data", + lambda _: DataFrame(table), + "State", + lambda _: MarkerFrame(State(table)), + "StartOfInputChannel", + MarkerFrame(StartOfInputChannel()), + "EndOfInputChannel", + MarkerFrame(EndOfInputChannel()), ) shared_queue.put(DataElement(tag=data_header.tag, payload=payload)) From 3fad5652aae4ff5c4e554c0a732383646e39a21d Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Fri, 20 Sep 2024 03:40:31 -0700 Subject: [PATCH 158/163] update --- .../main/python/core/runnables/data_processor.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index 922061c5381..6d05a8b609a 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -26,6 +26,10 @@ def __init__(self, context: Context): self._context = context def run(self) -> None: + """ + Start the data processing loop. Wait for context switch conditions to be met, + then continuously process markers or tuples until stopped. + """ with self._context.tuple_processing_manager.context_switch_condition: self._context.tuple_processing_manager.context_switch_condition.wait() self._running.set() @@ -42,6 +46,9 @@ def run(self) -> None: self._switch_context() def process_marker(self, marker: Marker) -> None: + """ + Process an input marker by invoking appropriate state or tuple generation based on the marker type. + """ try: executor = self._context.executor_manager.executor port_id = self._context.tuple_processing_manager.get_input_port_id() @@ -67,6 +74,9 @@ def process_marker(self, marker: Marker) -> None: self._switch_context() def process_tuple(self) -> None: + """ + Process an input tuple by invoking the executor's tuple processing method. + """ finished_current = self._context.tuple_processing_manager.finished_current while not finished_current.is_set(): try: @@ -89,6 +99,9 @@ def process_tuple(self) -> None: self._switch_context() def _set_output_tuple(self, output_iterator: Iterator[Optional[TupleLike]]) -> None: + """ + Set the output tuple after processing by the executor. + """ for output in output_iterator: # output could be a None, a TupleLike, or a TableLike. for output_tuple in all_output_to_tuple(output): @@ -103,6 +116,9 @@ def _set_output_tuple(self, output_iterator: Iterator[Optional[TupleLike]]) -> N self._context.tuple_processing_manager.finished_current.set() def _set_output_state(self, output_state: State) -> None: + """ + Set the output state after processing by the executor. + """ self._context.marker_processing_manager.current_output_state = output_state def _switch_context(self) -> None: From b816ad4355ef43d0b7c68104adc9394a2aae2e10 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Fri, 20 Sep 2024 03:45:22 -0700 Subject: [PATCH 159/163] update --- core/amber/src/main/python/core/runnables/data_processor.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/amber/src/main/python/core/runnables/data_processor.py b/core/amber/src/main/python/core/runnables/data_processor.py index 6d05a8b609a..c322f0c7e24 100644 --- a/core/amber/src/main/python/core/runnables/data_processor.py +++ b/core/amber/src/main/python/core/runnables/data_processor.py @@ -47,7 +47,8 @@ def run(self) -> None: def process_marker(self, marker: Marker) -> None: """ - Process an input marker by invoking appropriate state or tuple generation based on the marker type. + Process an input marker by invoking appropriate state + or tuple generation based on the marker type. """ try: executor = self._context.executor_manager.executor From 2f852741d3ffabbdce43f54ae9cf892ec1491f07 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Fri, 20 Sep 2024 04:04:04 -0700 Subject: [PATCH 160/163] update --- .../scala/edu/uci/ics/texera/workflow/common/Marker.scala | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index 034a82fe386..751eb53827f 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -12,9 +12,8 @@ final case class EndOfInputChannel() extends Marker final case class State(tuple: Option[Tuple] = None, passToAllDownstream: Boolean = false) extends Marker { val list: mutable.Map[String, (AttributeType, Any)] = mutable.HashMap() - if (tuple.isEmpty) { - add("passToAllDownstream", passToAllDownstream, AttributeType.BOOLEAN) - } else { + add("passToAllDownstream", passToAllDownstream, AttributeType.BOOLEAN) + if (tuple.isDefined) { tuple.get.getSchema.getAttributes.foreach { attribute => add(attribute.getName, tuple.get.getField(attribute.getName), attribute.getType) } From 901e3ea9c1a83b5bb8725a6ea175d4ad5ab2d6f7 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Fri, 20 Sep 2024 21:35:41 -0700 Subject: [PATCH 161/163] update --- .../ics/amber/engine/architecture/worker/DataProcessor.scala | 2 +- .../amber/engine/architecture/worker/DataProcessorSpec.scala | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala index 1c93b19fd6a..92c134480e2 100644 --- a/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala +++ b/core/amber/src/main/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessor.scala @@ -129,7 +129,7 @@ class DataProcessor( private[this] def processEndOfInputChannel(portId: Int): Unit = { try { val outputState = executor.produceStateOnFinish(portId) - if (outputState != null && outputState.isDefined) { + if (outputState.isDefined) { outputManager.emitMarker(outputState.get) } outputManager.outputIterator.setTupleOutput( diff --git a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala index 843e3e58b49..597622c0bc1 100644 --- a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala +++ b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala @@ -83,7 +83,7 @@ class DataProcessorSpec extends AnyFlatSpec with MockFactory with BeforeAndAfter input: Int ) => executor.produceStateOnFinish(input) ) - .expects(0) + .expects(0).returning(None) ( ( input: Int @@ -141,7 +141,7 @@ class DataProcessorSpec extends AnyFlatSpec with MockFactory with BeforeAndAfter input: Int ) => executor.produceStateOnFinish(input) ) - .expects(0) + .expects(0).returning(None) ( ( input: Int From 216dea88c35822a7d8ca012c8d279812db386001 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Fri, 20 Sep 2024 21:38:57 -0700 Subject: [PATCH 162/163] fix fmt --- .../engine/architecture/worker/DataProcessorSpec.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala index 597622c0bc1..3205a3397be 100644 --- a/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala +++ b/core/amber/src/test/scala/edu/uci/ics/amber/engine/architecture/worker/DataProcessorSpec.scala @@ -83,7 +83,8 @@ class DataProcessorSpec extends AnyFlatSpec with MockFactory with BeforeAndAfter input: Int ) => executor.produceStateOnFinish(input) ) - .expects(0).returning(None) + .expects(0) + .returning(None) ( ( input: Int @@ -141,7 +142,8 @@ class DataProcessorSpec extends AnyFlatSpec with MockFactory with BeforeAndAfter input: Int ) => executor.produceStateOnFinish(input) ) - .expects(0).returning(None) + .expects(0) + .returning(None) ( ( input: Int From 053eca6a9219aa70ee30602ff8d934a19b1c6858 Mon Sep 17 00:00:00 2001 From: linxinyuan Date: Mon, 23 Sep 2024 14:52:28 -0700 Subject: [PATCH 163/163] update --- .../edu/uci/ics/texera/workflow/common/Marker.scala | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala index 751eb53827f..41ef2fa6707 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/workflow/common/Marker.scala @@ -11,7 +11,7 @@ final case class EndOfInputChannel() extends Marker final case class State(tuple: Option[Tuple] = None, passToAllDownstream: Boolean = false) extends Marker { - val list: mutable.Map[String, (AttributeType, Any)] = mutable.HashMap() + val data: mutable.Map[String, (AttributeType, Any)] = mutable.LinkedHashMap() add("passToAllDownstream", passToAllDownstream, AttributeType.BOOLEAN) if (tuple.isDefined) { tuple.get.getSchema.getAttributes.foreach { attribute => @@ -20,9 +20,9 @@ final case class State(tuple: Option[Tuple] = None, passToAllDownstream: Boolean } def add(key: String, value: Any, valueType: AttributeType): Unit = - list.put(key, (valueType, value)) + data.put(key, (valueType, value)) - def get(key: String): Any = list(key)._2 + def get(key: String): Any = data(key)._2 def isPassToAllDownstream: Boolean = get("passToAllDownstream").asInstanceOf[Boolean] @@ -33,15 +33,15 @@ final case class State(tuple: Option[Tuple] = None, passToAllDownstream: Boolean .builder( Schema .builder() - .add(list.map { + .add(data.map { case (name, (attrType, _)) => new Attribute(name, attrType) }) .build() ) - .addSequentially(list.values.map(_._2).toArray) + .addSequentially(data.values.map(_._2).toArray) .build() override def toString: String = - list.map { case (key, (_, value)) => s"$key: $value" }.mkString(", ") + data.map { case (key, (_, value)) => s"$key: $value" }.mkString(", ") }