diff --git a/configure.ac b/configure.ac index 1ba80da935..0fbb22a3f0 100644 --- a/configure.ac +++ b/configure.ac @@ -3,7 +3,7 @@ # # Set Version of SST Elements Library -AC_INIT([SST Elements Library],[-dev],[sst@sandia.gov]) +AC_INIT([SST Elements Library],[14.1],[sst@sandia.gov]) AC_PREREQ([2.59]) AC_COPYRIGHT([Copyright National Technology and Engineering Solutions of Sandia (NTESS), 2004-2024]) diff --git a/src/sst/elements/llyr/Makefile.am b/src/sst/elements/llyr/Makefile.am deleted file mode 100644 index 7fe80e75f7..0000000000 --- a/src/sst/elements/llyr/Makefile.am +++ /dev/null @@ -1,73 +0,0 @@ -# -*- Makefile -*- -# -# -AM_CPPFLAGS += \ - $(MPI_CPPFLAGS) \ - -I$(top_srcdir)/src - -compdir = $(pkglibdir) -comp_LTLIBRARIES = libllyr.la -libllyr_la_SOURCES = \ - llyr.h \ - llyr.cc \ - llyrTypes.h \ - llyrHelpers.h \ - lsQueue.h \ - graph/graph.h \ - graph/edge.h \ - graph/vertex.h \ - parser/parser.h \ - parser/parser.cc \ - mappers/llyrMapper.h \ - mappers/simpleMapper.h \ - mappers/pyMapper.h \ - mappers/csvParser.h \ - pes/processingElement.h \ - pes/dummyPE.h \ - pes/fpPE.h \ - pes/intPE.h \ - pes/logicPE.h \ - pes/loadPE.h \ - pes/storePE.h \ - pes/complexPE.h - -deprecated_libllyr_sources = - -libllyr_la_CFLAGS = \ - $(AM_CLFAGS) \ - $(LLVM_CFLAGS) - -libllyr_la_CXXFLAGS = \ - $(AM_CXXFLAGS) \ - $(LLVM_CXXFLAGS) -fexceptions - -libllyr_la_CPPFLAGS = \ - -I$(top_srcdir)/src \ - $(AM_CPPFLAGS) -fPIC -Wall \ - $(MPI_CPPFLAGS) \ - $(LLVM_CPPFLAGS) - -libllyr_la_LDFLAGS = \ - -module \ - -avoid-version \ - $(AM_LDFLAGS) \ - $(LLVM_LDFLAGS) - -if !SST_ENABLE_PREVIEW_BUILD -libllyr_la_SOURCES += $(deprecated_libsimpleElementExample_sources) -endif - -EXTRA_DIST = \ - tests/llyr_test.py - -deprecated_EXTRA_DIST = - -if !SST_ENABLE_PREVIEW_BUILD -EXTRA_DIST += $(deprecated_EXTRA_DIST) -endif - -install-exec-hook: - $(SST_REGISTER_TOOL) SST_ELEMENT_SOURCE llyr=$(abs_srcdir) - $(SST_REGISTER_TOOL) SST_ELEMENT_TESTS llyr=$(abs_srcdir)/tests - - diff --git a/src/sst/elements/llyr/configure.m4 b/src/sst/elements/llyr/configure.m4 deleted file mode 100644 index 978a2fa95d..0000000000 --- a/src/sst/elements/llyr/configure.m4 +++ /dev/null @@ -1,12 +0,0 @@ -dnl -*- Autoconf -*- - -AC_DEFUN([SST_llyr_CONFIG], [ - sst_check_llyrx="yes" - - SST_CHECK_LLVM_CONFIG([have_llvm=1], - [have_llvm=0], - [AC_MSG_ERROR([LLVM libraries required, but not found])]) - - AS_IF([test "$have_llvm" = 1], [sst_check_llyrx="yes"], [sst_check_llyrx="no"]) - AS_IF([test "$sst_check_llyrx" = "yes"], [$1], [$2]) -]) diff --git a/src/sst/elements/llyr/graph/edge.h b/src/sst/elements/llyr/graph/edge.h deleted file mode 100644 index 4b807e731f..0000000000 --- a/src/sst/elements/llyr/graph/edge.h +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - - -#ifndef _LLYR_G_EDGE_H -#define _LLYR_G_EDGE_H - -#include -#include - -#include - -namespace SST { -namespace Llyr { - -struct EdgeProperties -{ - float weight_; -}; - -class Edge -{ -private: - EdgeProperties* properties_; - uint32_t destinationVertex_; - -protected: - -public: - explicit Edge( uint32_t vertexIn ) - { - properties_ = NULL; - destinationVertex_ = vertexIn; - } - explicit Edge( EdgeProperties* properties, uint32_t vertexIn ) - { - properties_ = properties; - destinationVertex_ = vertexIn; - } - ~Edge(); - - bool setProperties( EdgeProperties* properties ) - { - properties_ = properties; - return true; - } - - EdgeProperties* getProperties( void ) const - { - return properties_; - } - - uint32_t getDestination( void ) const - { - return destinationVertex_; - } - -}; //END Edge - - - - -} // namespace LLyr -} // namespace SST - -#endif - - - diff --git a/src/sst/elements/llyr/graph/graph.h b/src/sst/elements/llyr/graph/graph.h deleted file mode 100644 index 3dd6339629..0000000000 --- a/src/sst/elements/llyr/graph/graph.h +++ /dev/null @@ -1,341 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - - -#ifndef _LLYR_GRAPH_H -#define _LLYR_GRAPH_H - -// enable debugging in the graph lib -// #define GRAPH_DEBUG - -#include -#include - -#include "edge.h" -#include "vertex.h" - -#include -#include -#include -#include -#include - -namespace SST { -namespace Llyr { - - -template -class LlyrGraph -{ - -private: - uint32_t vertices_; - std::map< uint32_t, Vertex< T > >* vertex_map_; - -protected: - -public: - LlyrGraph(); - ~LlyrGraph(); - - uint32_t operator []( const Vertex< T >& value ) const; - - static void copyGraph( const LlyrGraph< T > &graphIn, LlyrGraph< T > &graphOut ); - - void printGraph(); - void printDot( std::string fileName ); - void printDotHardware( std::string fileName ); - - uint32_t outEdges( uint32_t vertexId ); - uint32_t numVertices() const; - - bool addEdge( uint32_t beginVertex, uint32_t endVertex ); - bool addEdge( uint32_t beginVertex, uint32_t endVertex, EdgeProperties* properties ); - - uint32_t addVertex( T type ); - uint32_t addVertex( uint32_t vertexNum, T type ); - - Vertex* getVertex( uint32_t vertexNum ) const; - void setVertex( uint32_t vertexNum, const Vertex &vertex ); - bool testVertex( uint32_t vertexNum ) const; - - std::map< uint32_t, Vertex >* getVertexMap( void ) const; - -}; - -template -LlyrGraph::LlyrGraph() -{ - vertex_map_ = new std::map< uint32_t, Vertex >; - vertices_ = 0; -} - -template -LlyrGraph::~LlyrGraph() -{} - -template -uint32_t LlyrGraph::operator []( const Vertex& value ) const -{ - for( auto it = vertex_map_->begin(); it != vertex_map_ ->end(); ++it ) { - if( it->second == value ) { - return it->first; - } - } - - return 0; -} - -template -void LlyrGraph::copyGraph( const LlyrGraph &graphIn, LlyrGraph &graphOut ) -{ - //container of input -> output vertex mappings - std::map< uint32_t, uint32_t > vertexMappings; - - //add all of the vertices in the input graph to the output graph - auto vertexMap = graphIn.getVertexMap(); - for( auto vertexIterator = vertexMap->begin(); vertexIterator != vertexMap ->end(); ++vertexIterator ) { - auto vertexIn = vertexIterator->second; - uint32_t newVertex = graphOut.addVertex(vertexIn.getValue()); - - auto retVal = vertexMappings.emplace( vertexIterator->first, newVertex ); - if( retVal.second == false ) { - ///TODO - } - -#ifdef GRAPH_DEBUG - std::cout << "Old: " << vertexIterator->first << " New: " << newVertex << std::endl; -#endif - } - - //go back and add all of the outEdges - for( auto vertexIterator = vertexMap->begin(); vertexIterator != vertexMap ->end(); ++vertexIterator ) { - uint32_t sourceVertex = vertexMappings[vertexIterator->first]; -// std::cout << "\n Adjacency list of vertex " << vertexIterator->first << "\n head "; - std::vector< Edge* >* adjacencyList = vertexIterator->second.getAdjacencyList(); - for( auto it = adjacencyList->begin(); it != adjacencyList->end(); ++it ) { -// std::cout << "-> " << (*it)->getDestination(); - - EdgeProperties* tempProperties = (*it)->getProperties(); - uint32_t destinationVertex = vertexMappings[(*it)->getDestination()]; -// std::cout << "SRC: " << vertexIterator->first << " DST: " << (*it)->getDestination() << std::endl; -// std::cout << "SRC: " << sourceVertex << " DST: " << destinationVertex << std::endl; - graphOut.addEdge(sourceVertex, destinationVertex, tempProperties); - - } -// std::cout << std::endl; - } - -} - -template -void LlyrGraph::printGraph(void) -{ - typename std::map< uint32_t, Vertex >::iterator vertexIterator; - for(vertexIterator = vertex_map_->begin(); vertexIterator != vertex_map_->end(); ++vertexIterator) { - std::cout << "\n Adjacency list of vertex " << vertexIterator->first << "\n head "; - - std::vector< Edge* >* adjacencyList = vertexIterator->second.getAdjacencyList(); - for( auto it = adjacencyList->begin(); it != adjacencyList->end(); ++it ) { - std::cout << "-> " << (*it)->getDestination(); - } - std::cout << std::endl; - } -} - -template -void LlyrGraph::printDot( std::string fileName ) -{ - std::ofstream outputFile(fileName.c_str(), std::ios::trunc); //open a file for writing (truncate the current contents) - if ( !outputFile ) //check to be sure file is open - std::cerr << "Error opening file."; - - outputFile << "digraph G {" << "\n"; - - typename std::map< uint32_t, Vertex >::iterator vertexIterator; - for(vertexIterator = vertex_map_->begin(); vertexIterator != vertex_map_->end(); ++vertexIterator) { - outputFile << vertexIterator->first << "[label=\""; - outputFile << vertexIterator->second.getValue(); - outputFile << "\"];\n"; - } - - for(vertexIterator = vertex_map_->begin(); vertexIterator != vertex_map_->end(); ++vertexIterator) { - std::vector< Edge* >* adjacencyList = vertexIterator->second.getAdjacencyList(); - - for( auto it = adjacencyList->begin(); it != adjacencyList->end(); ++it ) { - outputFile << vertexIterator->first; - outputFile << "->"; - outputFile << (*it)->getDestination(); - outputFile << "\n"; - } - } - - outputFile << "}"; - outputFile.close(); -} - -template -void LlyrGraph::printDotHardware( std::string fileName ) -{ - std::ofstream outputFile(fileName.c_str(), std::ios::trunc); //open a file for writing (truncate the current contents) - if ( !outputFile ) //check to be sure file is open - std::cerr << "Error opening file."; - - outputFile << "digraph G {" << "\n"; - - typename std::map< uint32_t, Vertex >::iterator vertexIterator; - for(vertexIterator = vertex_map_->begin(); vertexIterator != vertex_map_->end(); ++vertexIterator) { - outputFile << vertexIterator->first << "[label=\""; - outputFile << getOpString(vertexIterator->second.getValue()->getOpBinding()); - outputFile << " - " << vertexIterator->first; - outputFile << "\"];\n"; - } - - for(vertexIterator = vertex_map_->begin(); vertexIterator != vertex_map_->end(); ++vertexIterator) { - std::vector< Edge* >* adjacencyList = vertexIterator->second.getAdjacencyList(); - - for( auto it = adjacencyList->begin(); it != adjacencyList->end(); ++it ) { - outputFile << vertexIterator->first; - outputFile << "->"; - outputFile << (*it)->getDestination(); - outputFile << "\n"; - } - } - - outputFile << "}"; - outputFile.close(); -} - -template -uint32_t LlyrGraph::outEdges(uint32_t vertexId) -{ - return vertex_map_->at(vertexId).adjacencyList_->size(); -} - -template -uint32_t LlyrGraph::numVertices(void) const -{ - return vertices_; -} - -template -bool LlyrGraph::addEdge( uint32_t beginVertex, uint32_t endVertex ) -{ - Edge* edge = new Edge( endVertex ); - - if( vertex_map_->at(beginVertex).addEdge(edge) ) { - #ifdef GRAPH_DEBUG - std::cout << "add edge: " << beginVertex << " --> " << endVertex << "\n" << std::endl; - #endif - vertex_map_->at(beginVertex).addOutDegree(); - vertex_map_->at(endVertex).addInDegree(); - - return 1; - } - - return 0; -} - -template -bool LlyrGraph::addEdge( uint32_t beginVertex, uint32_t endVertex, EdgeProperties* properties ) -{ - Edge* edge = new Edge( properties, endVertex ); - - if( vertex_map_->at(beginVertex).addEdge(edge) ) { - #ifdef GRAPH_DEBUG - std::cout << "add edge: " << beginVertex << " --> " << endVertex << "\n" << std::endl; - #endif - vertex_map_->at(beginVertex).addOutDegree(); - vertex_map_->at(endVertex).addInDegree(); - return 1; - } - - return 0; -} - -template -uint32_t LlyrGraph::addVertex(T type) -{ - Vertex vertex; - vertex.setValue(type); - uint32_t vertexNum = vertices_; - -#ifdef GRAPH_DEBUG - std::cout << "add vertex: " << vertexNum << "\n" << std::endl; -#endif - - auto retVal = vertex_map_->emplace( vertexNum, vertex ); - if( retVal.second == false ) { - ///TODO - } - - vertices_ = vertices_ + 1; - return vertexNum; -} - -template -uint32_t LlyrGraph::addVertex(uint32_t vertexNum, T type) -{ - Vertex vertex; - vertex.setValue(type); - -#ifdef GRAPH_DEBUG - std::cout << "add vertex: " << vertexNum << "\n" << std::endl; -#endif - - auto retVal = vertex_map_->emplace( vertexNum, vertex ); - if( retVal.second == false ) { - ///TODO - } - - vertices_ = vertices_ + 1; - return vertexNum; -} - -template -Vertex* LlyrGraph::getVertex( uint32_t vertexNum ) const -{ - return &vertex_map_->at(vertexNum); -} - -template -void LlyrGraph::setVertex( uint32_t vertexNum, const Vertex &vertex ) -{ - vertex_map_->at(vertexNum) = vertex; -} - -template -bool LlyrGraph::testVertex( uint32_t vertexNum ) const -{ - if( vertex_map_->find(vertexNum) == vertex_map_->end() ) { - return 0; - } else { - return 1; - } -} - -template -std::map< uint32_t, Vertex >* LlyrGraph::getVertexMap( void ) const -{ - return vertex_map_; -} - -} // namespace LLyr -} // namespace SST - -#endif - - - diff --git a/src/sst/elements/llyr/graph/vertex.h b/src/sst/elements/llyr/graph/vertex.h deleted file mode 100644 index 5462e0d62c..0000000000 --- a/src/sst/elements/llyr/graph/vertex.h +++ /dev/null @@ -1,153 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - - -#ifndef _LLYR_G_VERTEX_H -#define _LLYR_G_VERTEX_H - -#include -#include - -#include -#include - -#include "edge.h" - -namespace SST { -namespace Llyr { - -struct VertexProperties -{ - -}; - -template -class Vertex -{ -private: - T value_; - bool visited_; - uint32_t numInEdges_; - uint32_t numOutEdges_; - - std::vector< Edge* >* adjacencyList_; - -protected: - -public: - Vertex() - { - adjacencyList_ = new std::vector< Edge* >; - visited_ = 0; - numInEdges_ = 0; - numOutEdges_ = 0; - } - - Vertex( T typeIn ) : value_(typeIn) - { - adjacencyList_ = new std::vector< Edge* >; - visited_ = 0; - numInEdges_ = 0; - numOutEdges_ = 0; - } - - Vertex(const Vertex &valueIn) - { - value_ = valueIn.value_; - visited_ = valueIn.visited_; - numInEdges_ = valueIn.numInEdges_; - numOutEdges_ = valueIn.numOutEdges_; - adjacencyList_ = new std::vector< Edge* >(*(valueIn.adjacencyList_)); - } - - bool operator == (const Vertex &valueIn) const - { - return(this->value_ == valueIn.getValue()); - } - - void setValue( T typeIn ) - { - value_ = typeIn; - } - - T getValue( void ) const - { - return value_; - } - - void setVisited( bool visitIn ) - { - visited_ = visitIn; - } - - bool getVisited( void ) const - { - return visited_; - } - - std::vector< Edge* >* getAdjacencyList( void ) const - { - return adjacencyList_; - } - - bool addEdge( Edge* edgeIn ) - { - bool found = 0; - std::vector< Edge* >* adjacencyList = getAdjacencyList(); - for( auto it = adjacencyList->begin(); it != adjacencyList->end(); ++it ) { - if( edgeIn->getDestination() == (*it)->getDestination() ) { - found = 1; - break; - } - } - - if(found == 1) { - return 0; - } else { - adjacencyList_->push_back(edgeIn); - return 1; - } - } - - void addInDegree() - { - ++numInEdges_; - } - - uint32_t getInDegree() const - { - return numInEdges_; - } - - void addOutDegree() - { - ++numOutEdges_; - } - - uint32_t getOutDegree() const - { - return numOutEdges_; - } - -}; //END Vertex - - -} // namespace LLyr -} // namespace SST - -#endif - - - diff --git a/src/sst/elements/llyr/llyr.cc b/src/sst/elements/llyr/llyr.cc deleted file mode 100644 index 802fc36a89..0000000000 --- a/src/sst/elements/llyr/llyr.cc +++ /dev/null @@ -1,594 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#include -#include - -#include -#include -#include -#include -#include - -#include "llyr.h" -#include "llyrTypes.h" -#include "llyrHelpers.h" -#include "parser/parser.h" -#include "mappers/mapperList.h" - -namespace SST { -namespace Llyr { - -LlyrComponent::LlyrComponent(ComponentId_t id, Params& params) : - Component(id) -{ - //initial params - clock_enabled_ = 1; - compute_complete = 0; - const uint32_t verbosity = params.find< uint32_t >("verbose", 0); - - //setup up i/o for messages - char prefix[256]; - sprintf(prefix, "[t=@t][%s]: ", getName().c_str()); - output_ = new SST::Output(prefix, verbosity, 0, Output::STDOUT); - - //tell the simulator not to end without us - registerAsPrimaryComponent(); - primaryComponentDoNotEndSim(); - - //set our Main Clock - const std::string clock_rate = params.find< std::string >("clock", "1.0GHz"); - output_->verbose(CALL_INFO, 1, 0, "Clock is configured for %s\n", clock_rate.c_str()); - clock_tick_handler_ = new Clock::Handler(this, &LlyrComponent::tick); - time_converter_ = registerClock(clock_rate, clock_tick_handler_); - - //set up memory interfaces - mem_interface_ = loadUserSubComponent("iface", ComponentInfo::SHARE_NONE, time_converter_, - new StandardMem::Handler(this, &LlyrComponent::handleEvent)); - - if( !mem_interface_ ) { - std::string interfaceName = params.find("memoryinterface", "memHierarchy.memInterface"); - output_->verbose(CALL_INFO, 1, 0, "Memory interface to be loaded is: %s\n", interfaceName.c_str()); - - Params interfaceParams = params.get_scoped_params("memoryinterfaceparams"); - interfaceParams.insert("port", "cache_link"); - mem_interface_ = loadAnonymousSubComponent(interfaceName, "iface", 0, ComponentInfo::SHARE_PORTS | - ComponentInfo::INSERT_STATS, interfaceParams, time_converter_, new StandardMem::Handler(this, &LlyrComponent::handleEvent)); - - if( !mem_interface_ ) { - output_->fatal(CALL_INFO, -1, "%s, Error loading memory interface\n", getName().c_str()); - } - } - - // set up device address space - starting_addr_ = params.find< uint64_t >("starting_addr", 0); - - // set up MMIO address for device - device_addr_ = params.find< uint64_t >("device_addr", 0); - if( device_addr_ != 0x00 ) { - clock_enabled_ = 0; - mem_interface_->setMemoryMappedAddressRegion(device_addr_, 1); - } - - //need a 'global' LS queue for reordering - ls_queue_ = new LSQueue(); - ls_entries_ = params.find< uint32_t >("ls_entries", 1); - - mem_handlers_ = new LlyrMemHandlers(this, ls_queue_, output_); - - //set up param struct - uint16_t queue_depth = params.find< uint16_t >("queue_depth", 256); - uint16_t arith_latency = params.find< uint16_t >("arith_latency", 1); - uint16_t int_latency = params.find< uint16_t >("int_latency", 1); - uint16_t int_div_latency = params.find< uint16_t >("int_div_latency", 4); - uint16_t fp_latency = params.find< uint16_t >("fp_latency", 4); - uint16_t fp_mul_latency = params.find< uint16_t >("fp_mul_latency", 8); - uint16_t fp_div_latency = params.find< uint16_t >("fp_div_latency", 40); - uint16_t complex_latency = params.find< uint16_t >("complex_latency_", 80); - std::string mapping_tool_ = params.find< std::string >("mapping_tool", ""); - - configData_ = new LlyrConfig { ls_queue_, mem_interface_, starting_addr_, mapping_tool_, verbosity, queue_depth, - arith_latency, int_latency, int_div_latency, fp_latency, fp_mul_latency, fp_div_latency, - complex_latency }; - - memFileName_ = params.find("mem_init", ""); - - //construct hardware graph - std::string const& hwFileName = params.find< std::string >("hardware_graph", "hardware.cfg"); - constructHardwareGraph(hwFileName); - - //construct application graph - std::string const& swFileName = params.find< std::string >("application", "app.in"); - constructSoftwareGraph(swFileName); - - //do the mapping - Params mapperParams; //empty but needed for loadModule API - std::string mapperName = params.find("mapper", "llyr.mapper.simple"); - llyr_mapper_ = loadModule(mapperName, mapperParams); - output_->verbose(CALL_INFO, 1, 0, "Mapping application to hardware with %s\n", mapperName.c_str()); - llyr_mapper_->mapGraph(hardwareGraph_, applicationGraph_, mappedGraph_, configData_); - mappedGraph_.printDotHardware("llyr_mapped.dot"); - - //init stats - zeroEventCycles_ = registerStatistic< uint64_t >("cycles_zero_events"); - eventCycles_ = registerStatistic< uint64_t >("cycles_events"); - - //all done - output_->verbose(CALL_INFO, 1, 0, "Initialization done.\n"); -} - -LlyrComponent::LlyrComponent() : - Component(-1) -{ - // for serialization only -} - -LlyrComponent::~LlyrComponent() -{ - output_->verbose(CALL_INFO, 1, 0, "Llyr destructor fired, closing down.\n"); - -// output_->verbose(CALL_INFO, 10, 0, "Dumping hardware graph...\n"); -// if( output_->getVerboseLevel() >= 10 ) { -// hardwareGraph_.printGraph(); -// hardwareGraph_.printDot("llyr_hdwr.dot"); -// } - - output_->verbose(CALL_INFO, 10, 0, "Dumping application graph...\n"); - if( output_->getVerboseLevel() >= 10 ) { - applicationGraph_.printGraph(); -// applicationGraph_.printDot("llyr_app.dot"); - -// auto app_vertex_map_ = applicationGraph_.getVertexMap(); -// for(auto appIterator = app_vertex_map_->begin(); appIterator != app_vertex_map_->end(); ++appIterator) { -// std::cout << appIterator->first << ": "; -// std::cout << appIterator->second.getValue().optype_ << " - "; -// std::cout << appIterator->second.getValue().argument_ << " - "; -// } - } - -// output_->verbose(CALL_INFO, 10, 0, "Dumping mapping...\n"); -// if( output_->getVerboseLevel() >= 10 ) { -// mappedGraph_.printGraph(); -// mappedGraph_.printDot("llyr_mapped.dot"); -// } -} - -void LlyrComponent::init( uint32_t phase ) -{ - output_->verbose(CALL_INFO, 2, 0, "Initializing...\n"); - - mem_interface_->init( phase ); - if( 0 == phase ) { - std::vector< uint64_t >* initVector; - - //Check to see if there is any memory being initialized - if( memFileName_ != "" ) { - initVector = constructMemory(memFileName_); - } else { - initVector = new std::vector< uint64_t > {16, 64, 32, 0 , 16382, 0, 0}; - } - - std::vector memInit; - constexpr auto buff_size = sizeof(uint64_t); - uint8_t buffer[buff_size] = {}; - for( auto it = initVector->begin(); it != initVector->end(); ++it ) { - std::memcpy(buffer, std::addressof(*it), buff_size); - for( uint32_t i = 0; i < buff_size; ++i ){ - memInit.push_back(buffer[i]); - } - } - - output_->verbose(CALL_INFO, 2, 0, ">> Writing memory contents (%" PRIu64 " bytes at index 0)\n", - (uint64_t) memInit.size()); -// for( std::vector< uint8_t >::iterator it = memInit.begin() ; it != memInit.end(); ++it ) { -// std::cout << uint32_t(*it) << ' '; -// } -// -// std::cout << "\n"; - - StandardMem::Request* initMemory = new StandardMem::Write(starting_addr_, memInit.size(), memInit); - output_->verbose(CALL_INFO, 1, 0, "Sending initialization data to memory...\n"); - mem_interface_->sendUntimedData(initMemory); - output_->verbose(CALL_INFO, 1, 0, "Initialization data sent.\n"); - } -} - -void LlyrComponent::setup() -{ -} - -void LlyrComponent::finish() -{ -} - -bool LlyrComponent::tick(SST::Cycle_t currentCycle) -{ - // TraceFunction trace(CALL_INFO_LONG); - if( clock_enabled_ == 0 ) { - return false; - } - - compute_complete = 0; - //On each tick perform BFS on graph and compute based on operand availability - //NOTE node0 is a dummy node to simplify the algorithm - std::queue< uint32_t > nodeQueue; - - output_->verbose(CALL_INFO, 1, 0, "Device clock tick\n"); - - //Mark all nodes in the PE graph un-visited - std::map< uint32_t, Vertex< ProcessingElement* > >* vertex_map_ = mappedGraph_.getVertexMap(); - typename std::map< uint32_t, Vertex< ProcessingElement* > >::iterator vertexIterator; - for(vertexIterator = vertex_map_->begin(); vertexIterator != vertex_map_->end(); ++vertexIterator) { - vertexIterator->second.setVisited(0); - } - - //Node 0 is a dummy node and is always the entry point - nodeQueue.push(0); - - //BFS and do operations if values available in input queues - while( nodeQueue.empty() == 0 ) { - uint32_t currentNode = nodeQueue.front(); - nodeQueue.pop(); - - std::vector< Edge* >* adjacencyList = vertex_map_->at(currentNode).getAdjacencyList(); - - //set visited for bfs - vertex_map_->at(currentNode).setVisited(1); - - //send n responses from L/S unit to destination - doLoadStoreOps(ls_entries_); - - //Let the PE decide whether or not it can do the compute - vertex_map_->at(currentNode).getValue()->doCompute(); - - //send one item from each output queue to destination - vertex_map_->at(currentNode).getValue()->doSend(); - - compute_complete = compute_complete | vertex_map_->at(currentNode).getValue()->getPendingOp(); - output_->verbose(CALL_INFO, 1, 0, "PE(%" PRIu32 ") pending: %" PRIu32 " status: %" PRIu32 "\n\n", - currentNode, vertex_map_->at(currentNode).getValue()->getPendingOp(), compute_complete ); - - //add the destination vertices from this node to the node queue - for( auto it = adjacencyList->begin(); it != adjacencyList->end(); it++ ) { - uint32_t destinationVertx = (*it)->getDestination(); - if( vertex_map_->at(destinationVertx).getVisited() == 0 ) { - vertex_map_->at(destinationVertx).setVisited(1); - nodeQueue.push(destinationVertx); - } - } - } - - // return false so we keep going - if( compute_complete == 1 ){ - eventCycles_->addData(1); - output_->verbose(CALL_INFO, 40, 0, "Continuing simulation due to live data...\n"); - return false; - } else if( ls_queue_->getNumEntries() > 0 ) { - zeroEventCycles_->addData(1); - output_->verbose(CALL_INFO, 40, 0, "Continuing simulation due to live memory...\n"); - return false; - } else { - output_->verbose(CALL_INFO, 40, 0, "Ending simulation due to flying cows...\n"); - primaryComponentOKToEndSim(); - return true; - } -} - -void LlyrComponent::handleEvent(StandardMem::Request* req) { - req->handle(mem_handlers_); -} - -// Handler for incoming Read requests -void LlyrComponent::LlyrMemHandlers::handle(StandardMem::Read* read) { - out->verbose(CALL_INFO, 8, 0, "Handle Read for Address p-0x%" PRIx64 " -- v-0x%" PRIx64 ".\n", read->pAddr, read->vAddr); - - // Make a response. Must fill in payload. - StandardMem::ReadResp* resp = static_cast(read->makeResponse()); - llyr_->mem_interface_->send(resp); -} - -// Handler for incoming Write requests -void LlyrComponent::LlyrMemHandlers::handle(StandardMem::Write* write) { - out->verbose(CALL_INFO, 8, 0, "Handle Write for Address p-0x%" PRIx64 " -- v-0x%" PRIx64 ".\n", write->pAddr, write->vAddr); - - llyr_->clock_enabled_ = 1; - - /* Send response (ack) if needed */ - if (!(write->posted)) { - llyr_->mem_interface_->send(write->makeResponse()); - } - delete write; -} - -// Handler for incoming Read responses -// - should be a response to a Read we issued -void LlyrComponent::LlyrMemHandlers::handle(StandardMem::ReadResp* resp) { - - // TraceFunction trace(CALL_INFO_LONG); - - std::stringstream dataOut; - for( auto &it : resp->data ) { - dataOut << unsigned(it) << " "; - } - out->verbose(CALL_INFO, 24, 0, "%s\n", dataOut.str().c_str()); - - // Read request needs some special handling - uint64_t addr = resp->pAddr; - uint64_t memValue = 0; - - dataOut.str(std::string()); - LlyrData testArg; - for( auto &it : resp->data ) { - testArg = it; - dataOut << testArg << " "; - } - out->verbose(CALL_INFO, 24, 0, "\n%s\n", dataOut.str().c_str()); - - std::memcpy( std::addressof(memValue), std::addressof(resp->data[0]), sizeof(memValue) ); - - testArg = memValue; -// std::cout << "*" << testArg << std::endl; - - out->verbose(CALL_INFO, 8, 0, "Response to a read, payload=%" PRIu64 ", for addr: %" PRIu64 - " to PE %" PRIu32 "\n", memValue, addr, ls_queue_->lookupEntry( resp->getID() ).second ); - - ls_queue_->setEntryData( resp->getID(), testArg ); - ls_queue_->setEntryReady( resp->getID(), 1 ); - - // Need to clean up the events coming back from the cache - delete resp; - out->verbose(CALL_INFO, 4, 0, "Complete cache response handling.\n"); -} - -// Handler for incoming Write responses -// should be a response to a Write we issued -void LlyrComponent::LlyrMemHandlers::handle(StandardMem::WriteResp* resp) { - - out->verbose(CALL_INFO, 8, 0, "Response to a write for addr: %" PRIu64 " to PE %" PRIu32 "\n", - resp->pAddr, ls_queue_->lookupEntry( resp->getID() ).second ); - ls_queue_->setEntryReady( resp->getID(), 2 ); - - // Need to clean up the events coming back from the cache - delete resp; - out->verbose(CALL_INFO, 4, 0, "Complete cache response handling.\n"); -} - -void LlyrComponent::doLoadStoreOps( uint32_t numOps ) -{ - // TraceFunction trace(CALL_INFO_LONG); - output_->verbose(CALL_INFO, 10, 0, "Doing L/S ops\n"); - for(uint32_t i = 0; i < numOps; ++i ) { - if( ls_queue_->getNumEntries() > 0 ) { - StandardMem::Request::id_t next = ls_queue_->getNextEntry(); - - if( ls_queue_->getEntryReady(next) == 1) { - output_->verbose(CALL_INFO, 10, 0, "--(1)Mem Req ID %" PRIu32 "\n", uint32_t(next)); - LlyrData data = ls_queue_->getEntryData(next); - //pass the value to the appropriate PE - uint32_t srcPe = ls_queue_->lookupEntry( next ).first; - - mappedGraph_.getVertex(srcPe)->getValue()->doReceive(data); - - ls_queue_->removeEntry( next ); - } else if( ls_queue_->getEntryReady(next) == 2 ){ - output_->verbose(CALL_INFO, 10, 0, "--(2)Mem Req ID %" PRIu32 "\n", uint32_t(next)); - ls_queue_->removeEntry( next ); - } - } - } -} - -void LlyrComponent::constructHardwareGraph(std::string fileName) -{ - output_->verbose(CALL_INFO, 1, 0, "Constructing Hardware Graph From: %s\n", fileName.c_str()); - - std::ifstream inputStream(fileName, std::ios::in); - if( inputStream.is_open() ) { - std::string thisLine; - uint64_t position; - while( std::getline( inputStream, thisLine ) ) { - output_->verbose(CALL_INFO, 15, 0, "Parsing: %s\n", thisLine.c_str()); - - // skip first and last lines if this is truly dot - if( thisLine.find( "{" ) != std::string::npos || thisLine.find( "}" ) != std::string::npos) { - continue; - } - - // skip if this description includes dot layout information - if( thisLine.find( "layout" ) != std::string::npos ) { - continue; - } - - //Ignore blank lines - if( std::all_of(thisLine.begin(), thisLine.end(), isspace) == 0 ) { - //First read all nodes - //If all nodes read, must mean we're at edge list - position = thisLine.find_first_of( "[" ); - if( position != std::string::npos ) { - uint32_t vertex = std::stoul( thisLine.substr( 0, position ) ); - - uint64_t posA = thisLine.find_first_of( "=" ) + 1; - uint64_t posB = thisLine.find_last_of( "]" ); - std::string op = thisLine.substr( posA, posB-posA ); - opType operation = getOptype(op); - - output_->verbose(CALL_INFO, 10, 0, "OpString: %s\t\t%" PRIu32 "\n", op.c_str(), operation); - hardwareGraph_.addVertex( vertex, operation ); - } else { - //edge delimiter - std::regex delimiter( "\\--" ); - - std::sregex_token_iterator iterA(thisLine.begin(), thisLine.end(), delimiter, -1); - std::sregex_token_iterator iterB; - std::vector edges( iterA, iterB ); - - edges[0].erase(remove_if(edges[0].begin(), edges[0].end(), isspace), edges[0].end()); - edges[1].erase(remove_if(edges[1].begin(), edges[1].end(), isspace), edges[1].end()); - - output_->verbose(CALL_INFO, 10, 0, "Edges %s--%s\n", edges[0].c_str(), edges[1].c_str()); - - hardwareGraph_.addEdge( std::stoul(edges[0]), std::stoul(edges[1]) ); - } - } - } - - inputStream.close(); - } - else { - output_->fatal(CALL_INFO, -1, "Error: Unable to open %s\n", fileName.c_str() ); - exit(0); - } - -} - -void LlyrComponent::constructSoftwareGraph(std::string fileName) -{ - output_->verbose(CALL_INFO, 1, 0, "Constructing Application Graph From: %s\n", fileName.c_str()); - - std::ifstream inputStream(fileName, std::ios::in); - if( inputStream.is_open() ) { - std::string thisLine; - uint64_t position; - - std::getline( inputStream, thisLine ); - position = thisLine.find( "ModuleID" ); - - output_->verbose(CALL_INFO, 16, 0, "Parsing: %s\n", thisLine.c_str()); - if( position != std::string::npos ) { - constructSoftwareGraphIR(inputStream); - } else { - constructSoftwareGraphApp(inputStream); - } - - inputStream.close(); - } else { - output_->fatal(CALL_INFO, -1, "Error: Unable to open %s\n", fileName.c_str() ); - exit(0); - } -} - -void LlyrComponent::constructSoftwareGraphIR(std::ifstream& inputStream) -{ - std::string thisLine; - - output_->verbose(CALL_INFO, 16, 0, "Sending to LLVM parser\n"); - - inputStream.seekg (0, inputStream.beg); - std::string irString( (std::istreambuf_iterator< char >( inputStream )), - (std::istreambuf_iterator< char >() )); - Parser parser(irString, output_); - parser.generateAppGraph("offload_"); -} - -void LlyrComponent::constructSoftwareGraphApp(std::ifstream& inputStream) -{ - std::string thisLine; - uint64_t position; - - inputStream.seekg (0, inputStream.beg); - while( std::getline( inputStream, thisLine ) ) { - output_->verbose(CALL_INFO, 15, 0, "Parsing: %s\n", thisLine.c_str()); - - //Ignore blank lines - if( std::all_of(thisLine.begin(), thisLine.end(), isspace) == 0 ) { - //First read all nodes, if all nodes read, must mean we're at edge list - position = thisLine.find_first_of( "pe_type" ); - if( position != std::string::npos ) { - AppNode tempNode; - uint32_t vertex = std::stoul( thisLine.substr( 0, position - 2 ) ); - - std::regex delimiter( ",| " ); - std::sregex_token_iterator iterA(thisLine.begin(), thisLine.end(), delimiter, -1); - std::sregex_token_iterator iterB; - std::vector< std::string > edges( iterA, iterB ); - - //clean up the strings a bit - for( auto testIter = edges.begin(); testIter != edges.end(); ++testIter ) { - testIter->erase(remove_if(testIter->begin(), testIter->end(), isspace), testIter->end()); - testIter->erase(remove(testIter->begin(), testIter->end(), '['), testIter->end()); - testIter->erase(remove(testIter->begin(), testIter->end(), ']'), testIter->end()); - output_->verbose(CALL_INFO, 10, 0, "Hiho %s\n", testIter->c_str()); - } - - //pe_type= - 8chars - std::string op = edges[1].substr(8); - opType operation = getOptype(op); - tempNode.optype_ = operation; - output_->verbose(CALL_INFO, 10, 0, "OpString: %s\t\t%" PRIu32 "\n", op.c_str(), tempNode.optype_); - - //Check to see if this PE has any arguments - for( uint32_t i = 2; i < edges.size(); ++i ) { - tempNode.argument_[i - 2] = edges[i]; - - std::cout << i << " - " << edges[i] << " "; - std::cout << i - 2 << " : " < edges( iterA, iterB ); - - edges[0].erase(remove_if(edges[0].begin(), edges[0].end(), isspace), edges[0].end()); - edges[1].erase(remove_if(edges[1].begin(), edges[1].end(), isspace), edges[1].end()); - - output_->verbose(CALL_INFO, 10, 0, "Edges %s--%s\n", edges[0].c_str(), edges[1].c_str()); - - applicationGraph_.addEdge( std::stoul(edges[0]), std::stoul(edges[1]) ); - } - } - } -} - -std::vector< uint64_t >* LlyrComponent::constructMemory(std::string fileName) -{ - std::vector< uint64_t >* tempVector = new std::vector< uint64_t >; - - std::ifstream inputStream(fileName, std::ios::in); - if( inputStream.is_open() ) { - - std::string thisLine; - while( std::getline( inputStream, thisLine ) ) - { - std::string value; - std::stringstream stringIn(thisLine); - while( std::getline(stringIn, value, ',') ) { - tempVector->push_back(std::stoull(value)); - } - } - -// std::cout << "Init Vector(" << tempVector->size() << "): "; -// for( auto it = tempVector->begin(); it != tempVector->end(); ++it ) { -// std::cout << *it; -// std::cout << " "; -// } -// std::cout << std::endl; - - inputStream.close(); - } else { - output_->fatal(CALL_INFO, -1, "Error: Unable to open %s\n", fileName.c_str() ); - exit(0); - } - - return tempVector; -} - -} // namespace llyr -} // namespace SST - - diff --git a/src/sst/elements/llyr/llyr.h b/src/sst/elements/llyr/llyr.h deleted file mode 100644 index a1a5b85fff..0000000000 --- a/src/sst/elements/llyr/llyr.h +++ /dev/null @@ -1,170 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _LLYR_H -#define _LLYR_H - -#include - -#include -#include - -#include -#include -#include - -#include "graph/graph.h" -#include "lsQueue.h" -#include "llyrTypes.h" -#include "pes/peList.h" -#include "mappers/llyrMapper.h" - -using namespace SST::Interfaces; - -namespace SST { -namespace Llyr { - -class LlyrComponent : public SST::Component -{ -public: - - // REGISTER THIS COMPONENT INTO THE ELEMENT LIBRARY - SST_ELI_REGISTER_COMPONENT( - LlyrComponent, - "llyr", - "LlyrDataflow", - SST_ELI_ELEMENT_VERSION(1,0,0), - "Configurable Dataflow Component", - COMPONENT_CATEGORY_UNCATEGORIZED - ) - - SST_ELI_DOCUMENT_PARAMS( - { "verbose", "Level of output verbosity, higher is more output, 0 is no output", 0 }, - { "clock", "Clock frequency", "1GHz" }, - { "device_addr", "Address of device (must be non-zero if not standalone)", "0" }, - { "starting_addr", "Address of device memory", "0" }, - { "clockcount", "Number of clock ticks to execute", "100000" }, - { "application", "Application in affine IR", "app.in" }, - { "hardware_graph", "Hardware connectivity graph", "grid.cfg" }, - { "mapping_tool", "External mapping tool", "" }, - { "mem_init", "Memory initialization file", "" }, - { "ls_entries", "Number of L/S entries to process each tick", "1" }, - { "queue_depth", "Number of buffer elements", "256" }, - { "arith_latency", "Number of clock ticks for ARITH operations", "1" }, - { "int_latency", "Number of clock ticks for INT operations", "1" }, - { "int_div_latency","Number of clock ticks for INT DIV operations", "4" }, - { "fp_latency", "Number of clock ticks for FP operations", "4" }, - { "fp_mul_latency", "Number of clock ticks for FP MUL operations", "8" }, - { "fp_div_latency", "Number of clock ticks for FP DIV operations", "40" }, - { "complex_latency", "Number of clock ticks for COMPLEX operations", "80" } - ) - - ///TODO - SST_ELI_DOCUMENT_STATISTICS( - { "cycles_zero_events", "Number of cycles where there were no events to process, no data tokens", "cycles", 1 }, - { "cycles_events", "Number of cycles where events needed to be processed", "cycles", 1 }, - ) - - SST_ELI_DOCUMENT_PORTS( - { "cache_link", "Link to Memory Controller", { "memHierarchy.memEvent" , "" } } - ) - - SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS( - { "memory", "The memory interface to use (e.g., interface to caches)", "Interfaces::SST::StandardMem" } - ) - - LlyrComponent(SST::ComponentId_t id, SST::Params& params); - ~LlyrComponent(); - - void setup(); - void finish(); - - void init( uint32_t phase ); - -protected: - - -private: - LlyrComponent(); // for serialization only - LlyrComponent( const LlyrComponent& ); // do not implement - void operator=( const LlyrComponent& ); // do not implement - - virtual bool tick( SST::Cycle_t currentCycle ); - - void handleEvent(StandardMem::Request* req); - /* Handlers for StandardMem::Request types */ - class LlyrMemHandlers : public StandardMem::RequestHandler { - public: - friend class LlyrComponent; - friend class LSQueue; - - LlyrMemHandlers(LlyrComponent* llyr, LSQueue* ls_queue, SST::Output* out) : - StandardMem::RequestHandler(out), ls_queue_(ls_queue), llyr_(llyr) {} - virtual ~LlyrMemHandlers() {} - - virtual void handle(StandardMem::Read* read) override; - virtual void handle(StandardMem::Write* write) override; - virtual void handle(StandardMem::ReadResp* resp) override; - virtual void handle(StandardMem::WriteResp* resp) override; - - LSQueue* ls_queue_; - LlyrComponent* llyr_; - }; - - LlyrMemHandlers* mem_handlers_; - StandardMem* mem_interface_; - Addr device_addr_; - Addr starting_addr_; - - std::string mapping_tool_; - - SST::TimeConverter* time_converter_; - Clock::HandlerBase* clock_tick_handler_; - bool handler_registered_; - bool clock_enabled_; - - bool compute_complete; - - SST::Link** links_; - SST::Link* clockLink_; - SST::Output* output_; - - Statistic< uint64_t >* zeroEventCycles_; - Statistic< uint64_t >* eventCycles_; - - LlyrConfig* configData_; - LlyrGraph< opType > hardwareGraph_; - LlyrGraph< AppNode > applicationGraph_; - LlyrGraph< ProcessingElement* > mappedGraph_; - - LlyrMapper* llyr_mapper_; - - void constructHardwareGraph( std::string fileName ); - void constructSoftwareGraph( std::string fileName ); - void constructSoftwareGraphIR( std::ifstream& inputStream ); - void constructSoftwareGraphApp( std::ifstream& inputStream ); - std::vector< uint64_t >* constructMemory( std::string fileName ); - std::string memFileName_; - - uint32_t ls_entries_; - LSQueue* ls_queue_; - void doLoadStoreOps( uint32_t numOps ); - -}; - -} // namespace LLyr -} // namespace SST - -#endif // _LLYR_H diff --git a/src/sst/elements/llyr/llyrHelpers.h b/src/sst/elements/llyr/llyrHelpers.h deleted file mode 100644 index 3fd0b76374..0000000000 --- a/src/sst/elements/llyr/llyrHelpers.h +++ /dev/null @@ -1,493 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - - -#ifndef _LLYR_HELPERS -#define _LLYR_HELPERS - -#include -#include -#include -#include - -#include "llyrTypes.h" -#include "mappers/csvParser.h" - -namespace SST { -namespace Llyr { - -inline opType const getOptype(std::string &opString) -{ - opType operation; - - // transform to make opString case insensitive - std::transform(opString.begin(), opString.end(), opString.begin(), - [](unsigned char c){ return std::toupper(c); } - ); - - if( opString == "ROUTE" ) - operation = ROUTE; - else if( opString == "ANY" ) - operation = ANY; - else if( opString == "ANY_MEM" ) - operation = ANY_MEM; - else if( opString == "LD" ) - operation = LD; - else if( opString == "LDADDR" ) - operation = LDADDR; - else if( opString == "STREAM_LD" ) - operation = STREAM_LD; - else if( opString == "ST" ) - operation = ST; - else if( opString == "STADDR" ) - operation = STADDR; - else if( opString == "STREAM_ST" ) - operation = STREAM_ST; - else if( opString == "ALLOCA" ) - operation = ALLOCA; - else if( opString == "ANY_LOGIC" ) - operation = ANY_LOGIC; - else if( opString == "AND" ) - operation = AND; - else if( opString == "OR" ) - operation = OR; - else if( opString == "XOR" ) - operation = XOR; - else if( opString == "NOT" ) - operation = NOT; - else if( opString == "SLL" ) - operation = SLL; - else if( opString == "SLR" ) - operation = SLR; - else if( opString == "ROL" ) - operation = ROL; - else if( opString == "ROR" ) - operation = ROR; - else if( opString == "EQ" ) - operation = EQ; - else if( opString == "EQ_IMM" ) - operation = EQ_IMM; - else if( opString == "NE" ) - operation = NE; - else if( opString == "UGT" ) - operation = UGT; - else if( opString == "UGT_IMM" ) - operation = UGT_IMM; - else if( opString == "UGE" ) - operation = UGE; - else if( opString == "UGE_IMM" ) - operation = UGE_IMM; - else if( opString == "SGT" ) - operation = SGT; - else if( opString == "SGT_IMM" ) - operation = SGT_IMM; - else if( opString == "SGE" ) - operation = SGE; - else if( opString == "ULT" ) - operation = ULT; - else if( opString == "ULE" ) - operation = ULE; - else if( opString == "ULE_IMM" ) - operation = ULE_IMM; - else if( opString == "SLT" ) - operation = SLT; - else if( opString == "SLT_IMM" ) - operation = SLT_IMM; - else if( opString == "SLE" ) - operation = SLE; - else if( opString == "AND_IMM" ) - operation = AND_IMM; - else if( opString == "OR_IMM" ) - operation = OR_IMM; - else if( opString == "ANY_INT" ) - operation = ANY_INT; - else if( opString == "ADD" ) - operation = ADD; - else if( opString == "SUB" ) - operation = SUB; - else if( opString == "MUL" ) - operation = MUL; - else if( opString == "DIV" ) - operation = DIV; - else if( opString == "REM" ) - operation = REM; - else if( opString == "ADDCONST" ) - operation = ADDCONST; - else if( opString == "SUBCONST" ) - operation = SUBCONST; - else if( opString == "MULCONST" ) - operation = MULCONST; - else if( opString == "DIVCONST" ) - operation = DIVCONST; - else if( opString == "REMCONST" ) - operation = REMCONST; - else if( opString == "INC" ) - operation = INC; - else if( opString == "INC_RST" ) - operation = INC_RST; - else if( opString == "ACC" ) - operation = ACC; - else if( opString == "ANY_FP" ) - operation = ANY_FP; - else if( opString == "FADD" ) - operation = FADD; - else if( opString == "FSUB" ) - operation = FSUB; - else if( opString == "FMUL" ) - operation = FMUL; - else if( opString == "FDIV" ) - operation = FDIV; - else if( opString == "FMatMul" ) - operation = FMatMul; - else if( opString == "ANY_CP" ) - operation = ANY_CP; - else if( opString == "TSIN" ) - operation = TSIN; - else if( opString == "TCOS" ) - operation = TCOS; - else if( opString == "TTAN" ) - operation = TTAN; - else if( opString == "DUMMY" ) - operation = DUMMY; - else if( opString == "BUFFER" ) - operation = BUFFER; - else if( opString == "REPEATER" ) - operation = REPEATER; - else if( opString == "ROS" ) - operation = ROS; - else if( opString == "RNE" ) - operation = RNE; - else if( opString == "ROZ" ) - operation = ROZ; - else if( opString == "ROO" ) - operation = ROO; - else if( opString == "ONEONAND" ) - operation = ONEONAND; - else if( opString == "GATED_ONE" ) - operation = GATED_ONE; - else if( opString == "MERGE" ) - operation = MERGE; - else if( opString == "FILTER" ) - operation = FILTER; - else if( opString == "SEL" ) - operation = SEL; - else if( opString == "RET" ) - operation = RET; - else - operation = OTHER; - - return operation; -} - -inline std::string const getOpString(const opType &op) -{ - std::string operation; - - if( op == ROUTE ) - operation = "ROUTE"; - else if( op == ANY ) - operation = "ANY"; - else if( op == ANY_MEM ) - operation = "ANY_MEM"; - else if( op == LD ) - operation = "LD"; - else if( op == LDADDR ) - operation = "LDADDR"; - else if( op == STREAM_LD ) - operation = "STREAM_LD"; - else if( op == ST ) - operation = "ST"; - else if( op == STADDR ) - operation = "STADDR"; - else if( op == STREAM_ST ) - operation = "STREAM_ST"; - else if( op == ALLOCA ) - operation = "ALLOCA"; - else if( op == ANY_LOGIC ) - operation = "ANY_LOGIC"; - else if( op == AND ) - operation = "AND"; - else if( op == OR ) - operation = "OR"; - else if( op == XOR ) - operation = "XOR"; - else if( op == NOT ) - operation = "NOT"; - else if( op == SLL ) - operation = "SLL"; - else if( op == SLR ) - operation = "SLR"; - else if( op == ROL ) - operation = "ROL"; - else if( op == ROR ) - operation = "ROR"; - else if( op == EQ ) - operation = "EQ"; - else if( op == EQ_IMM ) - operation = "EQ_IMM"; - else if( op == NE ) - operation = "NE"; - else if( op == UGT ) - operation = "UGT"; - else if( op == UGT_IMM ) - operation = "UGT_IMM"; - else if( op == UGE ) - operation = "UGE"; - else if( op == UGE_IMM ) - operation = "UGE_IMM"; - else if( op == SGT ) - operation = "SGT"; - else if( op == SGT_IMM ) - operation = "SGT_IMM"; - else if( op == SGE ) - operation = "SGE"; - else if( op == ULT ) - operation = "ULT"; - else if( op == ULE ) - operation = "ULE"; - else if( op == ULE_IMM ) - operation = "ULE_IMM"; - else if( op == SLT ) - operation = "SLT"; - else if( op == SLT_IMM ) - operation = "SLT_IMM"; - else if( op == SLE ) - operation = "SLE"; - else if( op == AND_IMM ) - operation = "AND_IMM"; - else if( op == OR_IMM ) - operation = "OR_IMM"; - else if( op == ANY_INT ) - operation = "ANY_INT"; - else if( op == ADD ) - operation = "ADD"; - else if( op == SUB ) - operation = "SUB"; - else if( op == MUL ) - operation = "MUL"; - else if( op == DIV ) - operation = "DIV"; - else if( op == REM ) - operation = "REM"; - else if( op == ADDCONST ) - operation = "ADDCONST"; - else if( op == SUBCONST ) - operation = "SUBCONST"; - else if( op == MULCONST ) - operation = "MULCONST"; - else if( op == DIVCONST ) - operation = "DIVCONST"; - else if( op == REMCONST ) - operation = "REMCONST"; - else if( op == INC ) - operation = "INC"; - else if( op == INC_RST ) - operation = "INC_RST"; - else if( op == ACC ) - operation = "ACC"; - else if( op == ANY_FP ) - operation = "ANY_FP"; - else if( op == FADD ) - operation = "FADD"; - else if( op == FSUB ) - operation = "FSUB"; - else if( op == FMUL ) - operation = "FMUL"; - else if( op == FDIV ) - operation = "FDIV"; - else if( op == FMatMul ) - operation = "FMatMul"; - else if( op == ANY_CP ) - operation = "ANY_CP"; - else if( op == TSIN ) - operation = "TSIN"; - else if( op == TCOS ) - operation = "TCOS"; - else if( op == TTAN ) - operation = "TTAN"; - else if( op == DUMMY ) - operation = "DUMMY"; - else if( op == BUFFER ) - operation = "BUFFER"; - else if( op == ROS ) - operation = "ROS"; - else if( op == RNE ) - operation = "RNE"; - else if( op == ROZ ) - operation = "ROZ"; - else if( op == ROO ) - operation = "ROO"; - else if( op == ONEONAND ) - operation = "ONEONAND"; - else if( op == GATED_ONE ) - operation = "GATED_ONE"; - else if( op == MERGE ) - operation = "MERGE"; - else if( op == FILTER ) - operation = "FILTER"; - else if( op == REPEATER ) - operation = "REPEATER"; - else if( op == SEL ) - operation = "SEL"; - else if( op == RET ) - operation = "RET"; - else - operation = "OTHER"; - - return operation; -} - -inline void printHardwareNode(HardwareNode* hardwareNode, std::ostream& os) -{ - os << "**** PE " << hardwareNode->pe_id_ << " ****" << std::endl; - os << "JOB: " << hardwareNode->job_id_ << std::endl; - os << "consts: " << std::endl; - slist_t::const_iterator cit = hardwareNode->const_list_->begin(); - for( ; cit!=hardwareNode->const_list_->end(); cit++ ) { - os << *cit << std::endl; - } - - os << "inputs: " << std::endl; - std::list< PairPE >::const_iterator pit = hardwareNode->input_list_->begin(); - for( ; pit!=hardwareNode->input_list_->end(); pit++ ) { - pair_t p = *pit; - os << p.first << ',' << p.second << std::endl; - } - - os << "OP: " << hardwareNode->op_ << std::endl; - os << "outputs: " << std::endl; - pit = hardwareNode->output_list_->begin(); - for( ; pit!=hardwareNode->output_list_->end(); pit++ ) { - pair_t p = *pit; - os << p.first << ',' << p.second << std::endl; - } - - os << "routes: " << std::endl; - std::list< TriplePE >::const_iterator tlit = hardwareNode->route_list_->begin(); - for( ; tlit!=hardwareNode->route_list_->end(); tlit++ ) { - triple_t t = *tlit; - os << std::get<0>(t) << "," << std::get<1>(t) << "," << std::get<2>(t) << std::endl; - } - - os << std::endl; -} - -inline std::list< std::string >* process_single_level(std::string str, char delim) -{ - CSVParser csvData(str, delim); - const auto& data = csvData.get_data(); - std::list< std::string >* result = new std::list< std::string >; - - for( const auto& row : data ) { - for( const auto& cell : row ) { - if( std::string(cell) != "" ) { - result->push_back(std::string(cell)); - } - } - } - - return result; -} - -inline PairPE process_pair(std::string str, char delim) -{ - CSVParser csvData(str, delim); - const auto& data = csvData.get_data(); - assert(data[0].size() == 2); - std::string field1 = std::string(data[0][0]); - std::string field2 = std::string(data[0][1]); - - return PairPE(field1, std::stoi(field2)); -} - -inline TriplePE process_triple(std::string str, char delim) -{ - CSVParser csvData(str, delim); - const auto& data = csvData.get_data(); - assert(data[0].size() == 3); - std::string field1 = std::string(data[0][0]); - std::string field2 = std::string(data[0][1]); - std::string field3 = std::string(data[0][2]); - - return TriplePE(field1, std::stoi(field2), std::stoi(field3)); -} - -inline PairEdge* process_edge_row(const std::vector< std::string >& row) -{ - const std::string from_pe = std::string(row[1]); - const std::string to_pe = std::string(row[2]); - std::cout << "EDGE: " << from_pe << " " << to_pe << std::endl; - - return new PairEdge(from_pe, to_pe); -} - -inline HardwareNode* process_node_row(const std::vector< std::string >& row) -{ - HardwareNode* hardwareNode = new HardwareNode; - - // TODO: figure out lists of lists - hardwareNode->pe_id_ = std::string(row[1]); - hardwareNode->job_id_ = std::string(row[2]); - hardwareNode->op_ = std::string(row[5]); - hardwareNode->const_list_ = process_single_level(std::string(row[3]), ';'); - - // parse inputs - std::list< PairPE >* input_pairs = new std::list< PairPE >; - std::list< std::string >* input_str = process_single_level(std::string(row[4]), ';'); - std::list< std::string >::const_iterator sit = input_str->begin(); - for( ; sit!=input_str->end(); sit++ ) { - std::string s = *sit; - if( s.length() > 1 ) { - pair_t p = process_pair(s, ','); - input_pairs->push_back(p); - } - } - hardwareNode->input_list_ = input_pairs; - - // parse outputs - std::list< PairPE >* output_pairs = new std::list< PairPE >; - std::list< std::string >* output_str = process_single_level(std::string(row[6]), ';'); - sit = output_str->begin(); - for( ; sit!=output_str->end(); sit++ ) { - std::string s = *sit; - if( s.length() > 1 ) { - pair_t p = process_pair(s, ','); - output_pairs->push_back(p); - } - } - hardwareNode->output_list_ = output_pairs; - - // parse routes -- Note that the way the csv parsing works, there may not be an 8th entry - std::list< TriplePE >* routes_triples = new std::list< TriplePE >; - if( row.size() >= 8 ) { - std::list< std::string >* routes_str = process_single_level(std::string(row[7]), ';'); - sit = routes_str->begin(); - for( ; sit!=routes_str->end(); sit++ ) { - std::string s = *sit; - if( s.length() > 1 ) { - TriplePE t = process_triple(s, ','); - routes_triples->push_back(t); - } - } - } - hardwareNode->route_list_ = routes_triples; - - return hardwareNode; -} - -}//Llyr -}//SST - -#endif // _LLYR_HELPERS diff --git a/src/sst/elements/llyr/llyrTypes.h b/src/sst/elements/llyr/llyrTypes.h deleted file mode 100644 index 0ebfea6727..0000000000 --- a/src/sst/elements/llyr/llyrTypes.h +++ /dev/null @@ -1,170 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - - -#ifndef _LLYR_TYPES -#define _LLYR_TYPES - -#include - -#include -#include -#include -#include - -#define Bit_Length 64 -typedef std::bitset< Bit_Length > LlyrData; -typedef std::string Arg; -typedef uint64_t Addr; - -typedef std::pair< const std::string, uint32_t > pair_t; -typedef std::tuple< const std::string, uint32_t, uint32_t > triple_t; -typedef std::list< std::string > slist_t; -typedef std::list< pair_t > plist_t; -typedef std::list< triple_t > tlist_t; - -using namespace SST::Interfaces; - -namespace SST { -namespace Llyr { - -// forward declaration of LSQueue -class LSQueue; - -// data type to pass between Llyr, mapper, and PEs -typedef struct alignas(uint64_t) { - LSQueue* lsqueue_; - StandardMem* mem_interface_; - Addr starting_addr_; - std::string mapping_tool_; - - uint32_t verbosity_; - uint16_t queueDepth_; - uint16_t arith_latency_; - uint16_t int_latency_; - uint16_t int_div_latency_; - uint16_t fp_latency_; - uint16_t fp_mul_latency_; - uint16_t fp_div_latency_; - uint16_t complex_latency_; -} LlyrConfig; - -// data type to store PE data -typedef std::pair< std::string, std::string > PairEdge; -typedef std::pair< std::string, uint32_t > PairPE; -typedef std::tuple< std::string, uint32_t, uint32_t > TriplePE; - -// data type for adv pes -typedef std::map< uint32_t, Arg > QueueArgMap; - -typedef struct alignas(uint64_t) { - std::string pe_id_; - std::string job_id_; - std::string op_; - std::list< std::string >* const_list_; - std::list< PairPE >* input_list_; - std::list< PairPE >* output_list_; - std::list< TriplePE >* route_list_; -} HardwareNode; - -typedef enum { - ROUTE = 0x00, - ANY, - ANY_MEM, - LD, - LDADDR, - STREAM_LD, - ST, - STADDR, - STREAM_ST, - ALLOCA, - ANY_LOGIC = 0x20, - AND, - OR, - XOR, - NOT, - SLL, - SLR, - ROL, - ROR, - AND_IMM, - OR_IMM, - ANY_TEST = 0x40, - EQ, - EQ_IMM, - NE, - UGT, - UGT_IMM, - UGE, - UGE_IMM, - SGT, - SGT_IMM, - SGE, - ULT, - ULE, - ULE_IMM, - SLT, - SLT_IMM, - SLE, - ANY_INT = 0x80, - ADD, - SUB, - MUL, - DIV, - REM, - ADDCONST, - SUBCONST, - MULCONST, - DIVCONST, - REMCONST, - INC, - INC_RST, - ACC, - ANY_FP = 0xC0, - FADD, - FSUB, - FMUL, - FDIV, - FMatMul, - ANY_CP = 0xF0, - TSIN, - TCOS, - TTAN, - DUMMY = 0xFF, - BUFFER, - REPEATER, - ROS, - RNE, - ROZ, - ROO, - ONEONAND, - GATED_ONE, - MERGE, - FILTER, - SEL, - RET, - OTHER -} opType; - -// application graph node -typedef struct alignas(uint64_t) { - opType optype_; - Arg argument_[2]; -} AppNode; - -}//Llyr -}//SST - -#endif // _LLYR_TYPES diff --git a/src/sst/elements/llyr/lsQueue.h b/src/sst/elements/llyr/lsQueue.h deleted file mode 100644 index ece3c61f2f..0000000000 --- a/src/sst/elements/llyr/lsQueue.h +++ /dev/null @@ -1,165 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - - -#ifndef _LLYR_LSQ -#define _LLYR_LSQ - -#include -#include - -#include -#include -#include -#include -#include - -#include "llyrTypes.h" - -using namespace SST::Interfaces; - -namespace SST { -namespace Llyr { - - -class LSEntry -{ -public: - LSEntry(const StandardMem::Request::id_t reqId, uint32_t srcProc, uint32_t dstProc) : - req_id_(reqId), src_proc_(srcProc), dst_proc_(dstProc), ready_(0) {} - ~LSEntry() {} - - uint32_t getSourcePe() const { return src_proc_; } - uint32_t getTargetPe() const { return dst_proc_; } - StandardMem::Request::id_t getReqId() const { return req_id_; } - - void setData( LlyrData data ) { data_ = data; } - LlyrData getData() const{ return data_; } - - void setReady( uint32_t ready ) { ready_ = ready; } - uint32_t getReady() const{ return ready_; } - -protected: - StandardMem::Request::id_t req_id_; - - uint32_t src_proc_; - uint32_t dst_proc_; - - uint32_t ready_; - LlyrData data_; - -private: - -}; // LSEntry - -class LSQueue -{ -public: - LSQueue() - { - //setup up i/o for messages - char prefix[256]; - sprintf(prefix, "[t=@t][LSQueue]: "); - output_ = new SST::Output(prefix, 0, 0, Output::STDOUT); - } - - LSQueue(const LSQueue ©) - { - output_ = copy.output_; - memory_queue_ = copy.memory_queue_; - pending_ = copy.pending_; - } - - ~LSQueue() {} - - uint32_t getNumEntries() const { return memory_queue_.size(); } - StandardMem::Request::id_t getNextEntry() const { return memory_queue_.front(); } - - void addEntry( LSEntry* entry ) - { - memory_queue_.push( entry->getReqId() ); - pending_.emplace( entry->getReqId(), entry ); - } - - std::pair< uint32_t, uint32_t > lookupEntry( StandardMem::Request::id_t id ) - { - auto entry = pending_.find( id ); - if( entry == pending_.end() ) { - output_->verbose(CALL_INFO, 0, 0, "Error: response from memory could not be found.\n"); - exit(-1); - } - - return std::make_pair( entry->second->getSourcePe(), entry->second->getTargetPe() ); - } - - void removeEntry( StandardMem::Request::id_t id ) - { - memory_queue_.pop(); - auto entry = pending_.find( id ); - if( entry != pending_.end() ) { - pending_.erase(entry); - } - } - - LlyrData getEntryData( StandardMem::Request::id_t id ) const - { - auto entry = pending_.find( id ); - if( entry != pending_.end() ) { - return entry->second->getData(); - } - - return 0; - } - - void setEntryData( StandardMem::Request::id_t id, LlyrData data ) - { - auto entry = pending_.find( id ); - if( entry != pending_.end() ) { - entry->second->setData(data); - } - } - - uint32_t getEntryReady( StandardMem::Request::id_t id ) const - { - auto entry = pending_.find( id ); - if( entry != pending_.end() ) { - return entry->second->getReady(); - } - - return 0; - } - - void setEntryReady( StandardMem::Request::id_t id, uint32_t ready ) - { - auto entry = pending_.find( id ); - if( entry != pending_.end() ) { - entry->second->setReady(ready); - } - } - -protected: - -private: - SST::Output* output_; - - std::queue< StandardMem::Request::id_t > memory_queue_; - std::map< StandardMem::Request::id_t, LSEntry* > pending_; - -}; // LSQueue - -} -} - -#endif // _LLYR_LSQ diff --git a/src/sst/elements/llyr/mappers/csvParser.h b/src/sst/elements/llyr/mappers/csvParser.h deleted file mode 100644 index b8d21b1d69..0000000000 --- a/src/sst/elements/llyr/mappers/csvParser.h +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _CSV_PARSER_H -#define _CSV_PARSER_H - -#include -#include -#include -#include - -#include "../llyrTypes.h" - -namespace SST { -namespace Llyr { - -class CSVParser { -public: - CSVParser(char delimiter = ',') : delimiter_(delimiter) - { - } - - CSVParser(const std::string& stringIn, char delimiter = ',') : delimiter_(delimiter) - { - read_csv_file(stringIn); - } - - const std::vector< std::vector< std::string > >& get_data() const { return data_; } - const std::vector< std::string >& operator[](uint32_t index) const { return data_[index]; } - -private: - char delimiter_; - std::vector< std::vector< std::string > > data_; - - void read_csv_file(const std::string& stringIn) - { - // is this a csv file or a row - auto const extension = stringIn.find_last_of('.'); - if( extension != std::string::npos ) { - std::string fileExtension = stringIn.substr(extension + 1); - - if( fileExtension.compare("csv") == 0 ) { - std::ifstream file(stringIn); - - if( !file.is_open() ) { - std::cerr << "File not found: " << stringIn << std::endl; - return; - } - - std::string line; - while (std::getline(file, line)) { - data_.push_back(parse_line(line)); - } - } - } else { - data_.push_back(parse_line(stringIn)); - } - } - - std::vector parse_line(const std::string& line) - { - std::vector< std::string > row; - std::stringstream ss(line); - std::string cell; - - while( std::getline(ss, cell, delimiter_ ) ) { - row.push_back(cell); - } - - return row; - } -}; - -}// namespace Llyr -}// namespace SST - -#endif // _CSV_PARSER_H diff --git a/src/sst/elements/llyr/mappers/llyrMapper.h b/src/sst/elements/llyr/mappers/llyrMapper.h deleted file mode 100644 index ca3494a081..0000000000 --- a/src/sst/elements/llyr/mappers/llyrMapper.h +++ /dev/null @@ -1,228 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _LLYR_MAPPER_H -#define _LLYR_MAPPER_H - -#include -#include - -#include "../graph/graph.h" -#include "../lsQueue.h" -#include "../llyrTypes.h" -#include "../llyrHelpers.h" -#include "pes/peList.h" - -namespace SST { -namespace Llyr { - -class LlyrMapper : public SST::Module -{ - -public: - SST_ELI_REGISTER_MODULE_API(SST::Llyr::LlyrMapper); - - LlyrMapper() : Module() {} - virtual ~LlyrMapper() {} - - virtual void mapGraph(LlyrGraph< opType > hardwareGraph, LlyrGraph< AppNode > appGraph, - LlyrGraph< ProcessingElement* > &graphOut, - LlyrConfig* llyr_config) = 0; - - void addNode(opType op_binding, uint32_t nodeNum, LlyrGraph< ProcessingElement* > &graphOut, - LlyrConfig* llyr_config); - void addNode(opType op_binding, QueueArgMap* arguments, uint32_t nodeNum, LlyrGraph< ProcessingElement* > &graphOut, - LlyrConfig* llyr_config); -}; - -void LlyrMapper::addNode(opType op_binding, uint32_t nodeNum, LlyrGraph< ProcessingElement* > &graphOut, - LlyrConfig* llyr_config) -{ - ProcessingElement* tempPE; - - //setup up i/o for messages - char prefix[256]; - sprintf(prefix, "[t=@t][llyrMapper]: "); - SST::Output* output_ = new SST::Output(prefix, llyr_config->verbosity_, 0, Output::STDOUT); - - if( op_binding == LD ) { - tempPE = new LoadProcessingElement( LD, nodeNum, llyr_config ); - } else if( op_binding == ALLOCA ) { - tempPE = new LoadProcessingElement( ALLOCA, nodeNum, llyr_config ); - } else if( op_binding == ST ) { - tempPE = new StoreProcessingElement( ST, nodeNum, llyr_config ); - } else if( op_binding == AND ) { - tempPE = new LogicProcessingElement( AND, nodeNum, llyr_config ); - } else if( op_binding == OR ) { - tempPE = new LogicProcessingElement( OR, nodeNum, llyr_config ); - } else if( op_binding == XOR ) { - tempPE = new LogicProcessingElement( XOR, nodeNum, llyr_config ); - } else if( op_binding == NOT ) { - tempPE = new LogicProcessingElement( NOT, nodeNum, llyr_config ); - } else if( op_binding == SLL ) { - tempPE = new LogicProcessingElement( SLL, nodeNum, llyr_config ); - } else if( op_binding == SLR ) { - tempPE = new LogicProcessingElement( SLR, nodeNum, llyr_config ); - } else if( op_binding == ROL ) { - tempPE = new LogicProcessingElement( ROL, nodeNum, llyr_config ); - } else if( op_binding == ROR ) { - tempPE = new LogicProcessingElement( ROR, nodeNum, llyr_config ); - } else if( op_binding == EQ ) { - tempPE = new LogicProcessingElement( EQ, nodeNum, llyr_config ); - } else if( op_binding == NE ) { - tempPE = new LogicProcessingElement( NE, nodeNum, llyr_config ); - } else if( op_binding == UGT ) { - tempPE = new LogicProcessingElement( UGT, nodeNum, llyr_config ); - } else if( op_binding == UGE ) { - tempPE = new LogicProcessingElement( UGE, nodeNum, llyr_config ); - } else if( op_binding == SGT ) { - tempPE = new LogicProcessingElement( SGT, nodeNum, llyr_config ); - } else if( op_binding == SGE ) { - tempPE = new LogicProcessingElement( SGE, nodeNum, llyr_config ); - } else if( op_binding == ULT ) { - tempPE = new LogicProcessingElement( ULT, nodeNum, llyr_config ); - } else if( op_binding == ULE ) { - tempPE = new LogicProcessingElement( ULE, nodeNum, llyr_config ); - } else if( op_binding == SLT ) { - tempPE = new LogicProcessingElement( SLT, nodeNum, llyr_config ); - } else if( op_binding == SLE ) { - tempPE = new LogicProcessingElement( SLE, nodeNum, llyr_config ); - } else if( op_binding == ADD ) { - tempPE = new IntProcessingElement( ADD, nodeNum, llyr_config ); - } else if( op_binding == SUB ) { - tempPE = new IntProcessingElement( SUB, nodeNum, llyr_config ); - } else if( op_binding == MUL ) { - tempPE = new IntProcessingElement( MUL, nodeNum, llyr_config ); - } else if( op_binding == DIV ) { - tempPE = new IntProcessingElement( DIV, nodeNum, llyr_config ); - } else if( op_binding == REM ) { - tempPE = new IntProcessingElement( REM, nodeNum, llyr_config ); - } else if( op_binding == FADD ) { - tempPE = new FPProcessingElement( FADD, nodeNum, llyr_config ); - } else if( op_binding == FSUB ) { - tempPE = new FPProcessingElement( FSUB, nodeNum, llyr_config ); - } else if( op_binding == FMUL ) { - tempPE = new FPProcessingElement( FMUL, nodeNum, llyr_config ); - } else if( op_binding == FDIV ) { - tempPE = new FPProcessingElement( FDIV, nodeNum, llyr_config ); - } else if( op_binding == FMatMul ) { - tempPE = new FPProcessingElement( FMatMul, nodeNum, llyr_config ); - } else if( op_binding == TSIN ) { - tempPE = new ComplexProcessingElement( TSIN, nodeNum, llyr_config ); - } else if( op_binding == TCOS ) { - tempPE = new ComplexProcessingElement( TCOS, nodeNum, llyr_config ); - } else if( op_binding == TTAN ) { - tempPE = new ComplexProcessingElement( TTAN, nodeNum, llyr_config ); - } else if( op_binding == DUMMY ) { - tempPE = new DummyProcessingElement( DUMMY, nodeNum, llyr_config ); - } else if( op_binding == BUFFER ) { - tempPE = new ControlProcessingElement( BUFFER, nodeNum, llyr_config ); - } else if( op_binding == REPEATER ) { - tempPE = new ControlProcessingElement( REPEATER, nodeNum, llyr_config ); - } else if( op_binding == ROZ ) { - tempPE = new ControlProcessingElement( ROZ, nodeNum, llyr_config ); - } else if( op_binding == ROO ) { - tempPE = new ControlProcessingElement( ROO, nodeNum, llyr_config ); - } else if( op_binding == ONEONAND ) { - tempPE = new ControlProcessingElement( ONEONAND, nodeNum, llyr_config ); - } else if( op_binding == GATED_ONE ) { - tempPE = new ControlProcessingElement( GATED_ONE, nodeNum, llyr_config ); - } else if( op_binding == MERGE ) { - tempPE = new ControlProcessingElement( MERGE, nodeNum, llyr_config ); - } else if( op_binding == SEL ) { - tempPE = new ControlProcessingElement( SEL, nodeNum, llyr_config ); - } else if( op_binding == ROUTE ) { - tempPE = new ControlProcessingElement( ROUTE, nodeNum, llyr_config ); - } else if( op_binding == RET ) { - tempPE = new ControlProcessingElement( RET, nodeNum, llyr_config ); - } else { - output_->fatal(CALL_INFO, -1, "Error: Unable to find specified operation\n"); - exit(0); - } - - graphOut.addVertex( nodeNum, tempPE ); - -}// addNode - -void LlyrMapper::addNode(opType op_binding, QueueArgMap* arguments, uint32_t nodeNum, LlyrGraph< ProcessingElement* > &graphOut, - LlyrConfig* llyr_config) -{ - ProcessingElement* tempPE; - - //setup up i/o for messages - char prefix[256]; - sprintf(prefix, "[t=@t][llyrMapper]: "); - SST::Output* output_ = new SST::Output(prefix, llyr_config->verbosity_, 0, Output::STDOUT); - - if( op_binding == LDADDR ) { - tempPE = new AdvLoadProcessingElement( LDADDR, nodeNum, llyr_config, arguments ); - } else if( op_binding == STREAM_LD ) { - tempPE = new AdvLoadProcessingElement( STREAM_LD, nodeNum, llyr_config, arguments ); - } else if( op_binding == STADDR ) { - tempPE = new AdvStoreProcessingElement( STADDR, nodeNum, llyr_config, arguments ); - } else if( op_binding == STREAM_ST ) { - tempPE = new AdvStoreProcessingElement( STREAM_ST, nodeNum, llyr_config, arguments ); - } else if( op_binding == AND_IMM ) { - tempPE = new LogicConstProcessingElement( AND_IMM, nodeNum, llyr_config, arguments ); - } else if( op_binding == OR_IMM ) { - tempPE = new LogicConstProcessingElement( OR_IMM, nodeNum, llyr_config, arguments ); - } else if( op_binding == EQ_IMM ) { - tempPE = new LogicConstProcessingElement( EQ_IMM, nodeNum, llyr_config, arguments ); - } else if( op_binding == UGT_IMM ) { - tempPE = new LogicConstProcessingElement( UGT_IMM, nodeNum, llyr_config, arguments ); - } else if( op_binding == UGE_IMM ) { - tempPE = new LogicConstProcessingElement( UGE_IMM, nodeNum, llyr_config, arguments ); - } else if( op_binding == ULE_IMM ) { - tempPE = new LogicConstProcessingElement( ULE_IMM, nodeNum, llyr_config, arguments ); - } else if( op_binding == SGT_IMM ) { - tempPE = new LogicConstProcessingElement( SGT_IMM, nodeNum, llyr_config, arguments ); - } else if( op_binding == SLT_IMM ) { - tempPE = new LogicConstProcessingElement( SLT_IMM, nodeNum, llyr_config, arguments ); - } else if( op_binding == ADDCONST ) { - tempPE = new IntConstProcessingElement( ADDCONST, nodeNum, llyr_config, arguments ); - } else if( op_binding == SUBCONST ) { - tempPE = new IntConstProcessingElement( SUBCONST, nodeNum, llyr_config, arguments ); - } else if( op_binding == MULCONST ) { - tempPE = new IntConstProcessingElement( MULCONST, nodeNum, llyr_config, arguments ); - } else if( op_binding == DIVCONST ) { - tempPE = new IntConstProcessingElement( DIVCONST, nodeNum, llyr_config, arguments ); - } else if( op_binding == REMCONST ) { - tempPE = new IntConstProcessingElement( REMCONST, nodeNum, llyr_config, arguments ); - } else if( op_binding == INC ) { - tempPE = new AdvIntProcessingElement( INC, nodeNum, llyr_config, arguments ); - } else if( op_binding == INC_RST ) { - tempPE = new AdvIntProcessingElement( INC_RST, nodeNum, llyr_config, arguments ); - } else if( op_binding == ACC ) { - tempPE = new AdvIntProcessingElement( ACC, nodeNum, llyr_config, arguments ); - } else if( op_binding == ROS ) { - tempPE = new ControlConstProcessingElement( ROS, nodeNum, llyr_config, arguments ); - } else if( op_binding == RNE ) { - tempPE = new ControlConstProcessingElement( RNE, nodeNum, llyr_config, arguments ); - } else if( op_binding == FILTER ) { - tempPE = new ControlConstProcessingElement( FILTER, nodeNum, llyr_config, arguments ); - } else { - output_->fatal(CALL_INFO, -1, "Error: Unable to find specified operation\n"); - exit(0); - } - - graphOut.addVertex( nodeNum, tempPE ); - -}// addNode - - -}// namespace Llyr -}// namespace SST - -#endif // _LLYR_MAPPER_H diff --git a/src/sst/elements/llyr/mappers/mapperList.h b/src/sst/elements/llyr/mappers/mapperList.h deleted file mode 100644 index 06ee49cff1..0000000000 --- a/src/sst/elements/llyr/mappers/mapperList.h +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _MAPPER_LIST_H -#define _MAPPER_LIST_H - -#include "simpleMapper.h" -#include "pyMapper.h" - -#endif //MAPPER_LIST_H diff --git a/src/sst/elements/llyr/mappers/pyMapper.h b/src/sst/elements/llyr/mappers/pyMapper.h deleted file mode 100644 index 69c2a3dfc1..0000000000 --- a/src/sst/elements/llyr/mappers/pyMapper.h +++ /dev/null @@ -1,587 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _PY_MAPPER_H -#define _PY_MAPPER_H - -#include -#include -#include -#include -#include -#include -#include - -#include "mappers/llyrMapper.h" -#include "csvParser.h" - -namespace SST { -namespace Llyr { - -typedef struct alignas(uint64_t) { - std::vector< uint32_t >* adjacency_list_; - std::vector< std::string >* state_list_; - std::vector< std::pair< uint32_t, uint32_t >* >* forward_list_; -} NodeAttributes; - -typedef struct alignas(uint64_t) { - std::string* routing_arg_; - ProcessingElement* src_node_; - ProcessingElement* dst_node_; -} RoutingFixUp; - -class PyMapper : public LlyrMapper -{ - -public: - explicit PyMapper(Params& params) : - LlyrMapper() {} - ~PyMapper() { } - - SST_ELI_REGISTER_MODULE( - PyMapper, - "llyr", - "mapper.py", - SST_ELI_ELEMENT_VERSION(1,0,0), - "App to HW", - SST::Llyr::LlyrMapper - ) - - void mapGraph(LlyrGraph< opType > hardwareGraph, LlyrGraph< AppNode > appGraph, - LlyrGraph< ProcessingElement* > &graphOut, - LlyrConfig* llyr_config); - -private: - - void runMappingTool( std::string ); - void getAdjacencyList( std::string, std::vector< uint32_t >* ); - void getStateList( std::string, std::vector< std::string >* ); - void printDot( std::string, LlyrGraph< ProcessingElement* >* ) const; - -}; - -void PyMapper::runMappingTool(std::string mapping_tool) -{ - FILE* fp; - - Py_Initialize(); - - fp = fopen(mapping_tool.c_str(), "r"); - PyRun_SimpleFile(fp, mapping_tool.c_str()); - - Py_Finalize(); -} - -void PyMapper::mapGraph(LlyrGraph< opType > hardwareGraph, LlyrGraph< AppNode > appGraph, - LlyrGraph< ProcessingElement* > &graphOut, - LlyrConfig* llyr_config) -{ - // TraceFunction trace(CALL_INFO_LONG); - //setup up i/o for messages - char prefix[256]; - sprintf(prefix, "[t=@t][pyMapper]: "); - SST::Output* output_ = new SST::Output(prefix, llyr_config->verbosity_, 0, Output::STDOUT); - - if( llyr_config->mapping_tool_ == "" ) { - output_->fatal( CALL_INFO, -1, "This mapper requires a pre-processor, defined as mapper_tool in the configuration.\n" ); - exit(0); - } - - char tmp[256]; - if( getcwd(tmp, sizeof(tmp) ) != NULL) { - std::cout << "Current working directory: " << tmp << std::endl; - } else { - output_->fatal( CALL_INFO, -1, "Failed to get pwd.\n" ); - exit(0); - } - - // namespace filesystem_var = std::filesystem; - // std::cout << "Current working directory: " << filesystem_var.current_path() << std::endl; - runMappingTool(llyr_config->mapping_tool_); - -// std::string fileName = "deepmind/strassen2x2_clay.csv"; -// std::string fileName = "deepmind/strassen_6x7_rect_gap1.csv"; - std::string fileName = "ipdps24/generic_solution.csv"; - output_->verbose(CALL_INFO, 1, 0, "Mapping Application Using: %s\n", fileName.c_str()); - - std::list< HardwareNode* > node_list; - std::list< PairEdge* > edge_list; - - CSVParser csvData(fileName, '|'); - - const auto& data = csvData.get_data(); - - for( const auto& row : data ) { - if( row[0] == "node" ) { - node_list.push_back( process_node_row(row) ); - } else { - edge_list.push_back( process_edge_row(row) ); - } - } - - // add the nodes from the mapper to the hardware graph - for( auto it = node_list.begin(); it != node_list.end(); ++it ) { - printHardwareNode(*it, std::cout); - std::cout << std::endl; - - // target node in fabric - uint32_t hardwareVertex = std::stoul( (*it)->pe_id_ ); - output_->verbose(CALL_INFO, 15, 0, "Target Graph Node: %u\n", hardwareVertex); - - // operation - if( (*it)->op_ != "" ) { - opType op = getOptype( (*it)->op_ ); - output_->verbose(CALL_INFO, 15, 0, "Operation: %u -- %s\n", op, getOpString(op).c_str()); - - // get constants - QueueArgMap* arguments = new QueueArgMap; - for( auto val_it = (*it)->const_list_->begin(); val_it != (*it)->const_list_->end(); ++val_it ) { - if( val_it->size() > 0 ) { - uint64_t posA = val_it->find_first_of( ":" ); - - std::cout << " Consts(" << val_it->size() << " -- " << (*it)->const_list_->size() << ")\n"; - std::cout << *val_it << " -- " << val_it->substr(0, posA) << std::flush; - std::cout << " -- " << val_it->substr(posA + 1) << std::flush; - std::cout << std::endl; - - Arg some_arg = val_it->substr(0, posA); - uint32_t queue_id = std::stoll(val_it->substr(posA + 1)); - arguments->emplace( queue_id, some_arg ); - } - } - - // encode node in hardware graph - // some nodes need queue initialization -- right now, that's mostly const/imm pes TODO fix naming - if( op == ADDCONST || op == SUBCONST || op == MULCONST || op == DIVCONST || op == REMCONST ) { - addNode( op, arguments, hardwareVertex, graphOut, llyr_config ); - } else if( op == INC || op == INC_RST || op == ACC ) { - addNode( op, arguments, hardwareVertex, graphOut, llyr_config ); - } else if( op == LDADDR || op == STREAM_LD || op == STADDR || op == STREAM_ST ) { - addNode( op, arguments, hardwareVertex, graphOut, llyr_config ); - } else if( op == OR_IMM || op == AND_IMM ) { - addNode( op, arguments, hardwareVertex, graphOut, llyr_config ); - } else if( op == EQ_IMM || op == UGT_IMM || op == UGE_IMM || op == ULE_IMM || op == SGT_IMM || op == SLT_IMM ) { - addNode( op, arguments, hardwareVertex, graphOut, llyr_config ); - } else if( op == ROS || op == RNE || op == FILTER ) { - addNode( op, arguments, hardwareVertex, graphOut, llyr_config ); - } else { - addNode( op, hardwareVertex, graphOut, llyr_config ); - } - } - } - - // add the edges, ignore queue bindings - for( auto it = edge_list.begin(); it != edge_list.end(); ++it ) { - uint32_t source_pe = std::stoul( (*it)->first ); - uint32_t dest_pe = std::stoul( (*it)->second ); - - graphOut.addEdge( source_pe, dest_pe ); - } - - // should probably just check nodes in adj list - // add the edges, instantiating the input/output queues - std::cout << "------------------------------------------------------\n"; - std::cout << "\tBinding Nodes\n"; - std::cout << "------------------------------------------------------\n" << std::endl; - ProcessingElement* srcNode; - ProcessingElement* dstNode; - std::list< RoutingFixUp* > routing_fix_list; - std::map< uint32_t, Vertex< ProcessingElement* > >* vertex_map = graphOut.getVertexMap(); - for( auto vertexIterator = vertex_map->begin(); vertexIterator != vertex_map->end(); ++vertexIterator ) { - std::cout << "num input queues(" << vertexIterator->second.getValue()->getProcessorId() << ")"; - std::cout << ": " << vertexIterator->second.getValue()->getNumInputQueues() << std::endl; - vertexIterator->second.getValue()->inputQueueInit(); - std::cout << std::flush; - std::cout << "num input queues(" << vertexIterator->second.getValue()->getProcessorId() << ")"; - std::cout << ": " << vertexIterator->second.getValue()->getNumInputQueues() << std::endl; - std::cout << std::endl; - - // testing creating all input queues on init - uint32_t current_node = vertexIterator->first; - auto src_node_iter = std::find_if(node_list.begin(), node_list.end(), [¤t_node] - (const HardwareNode* some_node){ return std::stoul(some_node->pe_id_) == current_node;} ); - - std::cout << "Node " << vertexIterator->second.getValue()->getProcessorId(); - std::cout << ": const " << (*src_node_iter)->const_list_->size(); - std::cout << ": input " << (*src_node_iter)->input_list_->size(); - std::cout << std::endl; - - vertexIterator->second.getValue()->createInputQueues((*src_node_iter)->const_list_->size() + (*src_node_iter)->input_list_->size()); - } - - for( auto vertexIterator = vertex_map->begin(); vertexIterator != vertex_map->end(); ++vertexIterator ) { - uint32_t current_node = vertexIterator->first; - - // bind the queues -- there is now explicit binding based on arguments - // find the current node in the hardware list - auto src_node_iter = std::find_if(node_list.begin(), node_list.end(), [¤t_node] - (const HardwareNode* some_node){ return std::stoul(some_node->pe_id_) == current_node;} ); - - std::cout << "** Binding Node " << (*src_node_iter)->pe_id_ << " **" << std::endl; - std::cout << "\t** Output Queue " << (*src_node_iter)->pe_id_ << " **" << std::endl; - - uint32_t output_queue_num = 0; - for( auto output_iter = (*src_node_iter)->output_list_->begin(); output_iter != (*src_node_iter)->output_list_->end(); ++output_iter ) { - - std::string arg = output_iter->first; - uint32_t dst_node = output_iter->second; - std::cout << "\targ: " << arg << " --> " << dst_node << std::endl; - - auto dst_node_iter = std::find_if(node_list.begin(), node_list.end(), [&dst_node] - (const HardwareNode* some_node){ return std::stoul(some_node->pe_id_) == dst_node;} ); - - // check input list at destination for match - bool is_also_route = 0; - uint32_t new_input_id = 0; - for( auto input_iter = (*dst_node_iter)->input_list_->begin(); input_iter != (*dst_node_iter)->input_list_->end(); ++input_iter ) { - - std::cout << "\t\t" << input_iter->first << " -- " << input_iter->second << " (offset " << new_input_id << ")" << std::endl; - std::string* routing_arg = new std::string(""); - if( arg == input_iter->first ) { - - // check to see if this variable is also being routed - for( auto route_iter = (*dst_node_iter)->route_list_->begin(); route_iter != (*dst_node_iter)->route_list_->end(); ++route_iter ) { - - std::cout << "\t" << std::get<0>(*route_iter) << " ++ " << std::get<1>(*route_iter); - std::cout << " ++ " << std::get<2>(*route_iter) << std::endl; - - if( input_iter->first == std::get<0>(*route_iter) ) { - is_also_route = 1; - *routing_arg = std::get<0>(*route_iter); - break; - } - } - - srcNode = vertex_map->at(current_node).getValue(); - dstNode = vertex_map->at(dst_node).getValue(); - - const uint32_t input_queue_offset = (*dst_node_iter)->const_list_->size() + new_input_id; - - if( is_also_route == 1 ) { - srcNode->bindOutputQueue(dstNode, output_queue_num); - dstNode->bindInputQueue(srcNode, input_queue_offset, 1, routing_arg); - std::cout << "AAA: " << current_node << " -> " << dst_node << " :: " << input_queue_offset << " -> " << output_queue_num; - std::cout << " <" << *routing_arg << ">"; - std::cout << std::endl; - } else { - srcNode->bindOutputQueue(dstNode, output_queue_num); - dstNode->bindInputQueue(srcNode, input_queue_offset, 0); - std::cout << "BBB: " << current_node << " -> " << dst_node << " :: " << input_queue_offset << " -> " << output_queue_num; - std::cout << std::endl; - } - - break; - } - - new_input_id = new_input_id + 1; - }// dst for - - // check route list at destination for match - new_input_id = 0; - for( auto route_iter = (*dst_node_iter)->route_list_->begin(); route_iter != (*dst_node_iter)->route_list_->end(); ++route_iter ) { - if( arg == std::get<0>(*route_iter) ) { - if( is_also_route == 1 ) { - continue; - } - - std::cout << "\t\t" << arg << " ++ " << std::get<0>(*route_iter) << " " << (*dst_node_iter)->pe_id_ << " (offset " << new_input_id << ")" << std::endl; - - std::string* routing_arg = new std::string(arg); - - srcNode = vertex_map->at(current_node).getValue(); - dstNode = vertex_map->at(dst_node).getValue(); - - const uint32_t input_queue_offset = (*dst_node_iter)->const_list_->size() + (*dst_node_iter)->input_list_->size() + new_input_id; - - srcNode->bindOutputQueue(dstNode, output_queue_num); - dstNode->bindInputQueue(srcNode, input_queue_offset, -1, routing_arg); - std::cout << "CCC: " << current_node << " -> " << dst_node << " :: " << input_queue_offset << " -> " << output_queue_num; - std::cout << " <" << *routing_arg << ">"; - std::cout << std::endl; - } - - // FIXME -- This is hack-y ^-^ - // If this item is also on the input list, don't increment - bool dual_list = 0; - for( auto input_iter = (*dst_node_iter)->input_list_->begin(); input_iter != (*dst_node_iter)->input_list_->end(); ++input_iter ) { - std::cout << "\t\tx " << input_iter->first << " -- " << std::get<0>(*route_iter) << " (offset " << new_input_id << ")" << std::endl; - - if( std::get<0>(*route_iter) == input_iter->first ) { - dual_list = 1; - break; - } - } - - if( dual_list == 0 ) { - new_input_id = new_input_id + 1; - } - } - - output_queue_num = output_queue_num + 1; - }// binding output queues - - std::cout << "\t** Route Queue " << (*src_node_iter)->pe_id_ << " **" << std::endl; - for( auto route_iter = (*src_node_iter)->route_list_->begin(); route_iter != (*src_node_iter)->route_list_->end(); ++route_iter ) { - std::string arg = std::get<0>(*route_iter); - uint32_t dst_node = std::get<2>(*route_iter); - - std::cout << "\t src: " << (*src_node_iter)->pe_id_; - std::cout << " dst: " << dst_node; - std::cout << std::endl; - - auto dst_node_iter = std::find_if(node_list.begin(), node_list.end(), [&dst_node] - (const HardwareNode* some_node){ return std::stoul(some_node->pe_id_) == dst_node;} ); - - // check input list at destination for match - bool is_input = 0; - std::cout << "\t\tChecking input list at dst..." << std::endl; - - uint32_t new_input_id = 0; - for( auto input_iter = (*dst_node_iter)->input_list_->begin(); input_iter != (*dst_node_iter)->input_list_->end(); ++input_iter ) { - std::string* routing_arg = new std::string(""); - if( arg == input_iter->first ) { - - // check to see if this variable is also being routed - bool is_also_route = 0; - for( auto route_iter = (*dst_node_iter)->route_list_->begin(); route_iter != (*dst_node_iter)->route_list_->end(); ++route_iter ) { - std::cout << "\t" << std::get<0>(*route_iter) << " ++ " << std::get<1>(*route_iter); - std::cout << " ++ " << std::get<2>(*route_iter) << std::endl; - - if( input_iter->first == std::get<0>(*route_iter) ) { - is_also_route = 1; - *routing_arg = std::get<0>(*route_iter); - break; - } - } - - srcNode = vertex_map->at(current_node).getValue(); - dstNode = vertex_map->at(dst_node).getValue(); - - const uint32_t input_queue_offset = (*dst_node_iter)->const_list_->size() + new_input_id; - - if( is_also_route == 1 ) { - srcNode->bindOutputQueue(dstNode, output_queue_num); - dstNode->bindInputQueue(srcNode, input_queue_offset, 1, routing_arg); - std::cout << "DDD: " << current_node << " -> " << dst_node << " :: " << input_queue_offset << " -> " << output_queue_num; - std::cout << " <" << *routing_arg << ">"; - std::cout << std::endl; - } else { - srcNode->bindOutputQueue(dstNode, output_queue_num); - dstNode->bindInputQueue(srcNode, input_queue_offset, 0); - std::cout << "EEE: " << current_node << " -> " << dst_node << " :: " << input_queue_offset << " -> " << output_queue_num; - std::cout << std::endl; - } - - output_queue_num = output_queue_num + 1; - is_input = 1; - break; - } - - new_input_id = new_input_id + 1; - } - - //if not an input, check routing queue (node could route-to-route) - uint32_t input_route_count = 0; - std::cout << "\t\tChecking route list at dst..." << std::endl; - if( is_input == 0 ) { - new_input_id = 0; - for( auto dst_route_iter = (*dst_node_iter)->route_list_->begin(); dst_route_iter != (*dst_node_iter)->route_list_->end(); ++dst_route_iter ) { - - std::string* routing_arg = new std::string(arg); - std::cout << "\t\t\t" << std::get<0>(*dst_route_iter) << std::endl; - if( arg == std::get<0>(*dst_route_iter) ) { - std::cout << "FOUND " << arg << ", input " << is_input << ", offset " << new_input_id; - std::cout << ", count " << input_route_count << std::endl; - - srcNode = vertex_map->at(current_node).getValue(); - dstNode = vertex_map->at(dst_node).getValue(); - - //find which input queue -- const list size + something - const uint32_t input_queue_offset = (*dst_node_iter)->const_list_->size() + (*dst_node_iter)->input_list_->size() + new_input_id; - -std::cout << "xxxxx: " << (*dst_node_iter)->const_list_->size() << " " << (*dst_node_iter)->input_list_->size() << " <> " << input_queue_offset << std::endl; - - srcNode->bindOutputQueue(dstNode, output_queue_num); - dstNode->bindInputQueue(srcNode, input_queue_offset, -1, routing_arg); - std::cout << "FFF: " << current_node << " -> " << dst_node << " :: " << input_queue_offset << " -> " << output_queue_num; - std::cout << " <" << *routing_arg << ">"; - std::cout << std::endl; - - output_queue_num = output_queue_num + 1; - is_input = 1; - break; - } - - // don't want to inc the place in the queue binding for routes if we already treated it as an input - auto in_rt_pair = std::find_if((*dst_node_iter)->input_list_->begin(), (*dst_node_iter)->input_list_->end(), - [&dst_route_iter](const PairPE some_pe){ return some_pe.first == std::get<0>(*dst_route_iter);} ); - if( in_rt_pair != (*dst_node_iter)->input_list_->end() ) { - std::cout << "pp-" << (*in_rt_pair).first << std::endl; - input_route_count = input_route_count + 1; - } else { - new_input_id = new_input_id + 1; - } - } - } - }// binding route queue - } - - // add routing arguments for output queues - for( auto vertexIterator = vertex_map->begin(); vertexIterator != vertex_map->end(); ++vertexIterator ) { - - uint32_t current_node = vertexIterator->first; - auto node_iter = std::find_if(node_list.begin(), node_list.end(), [¤t_node] - (const HardwareNode* some_node){ return std::stoul(some_node->pe_id_) == current_node;} ); - - std::cout << "\nFixing route for node " << current_node << std::endl; - for( auto route_iter = (*node_iter)->route_list_->begin(); route_iter != (*node_iter)->route_list_->end(); ++route_iter ) { - std::string* routing_arg = new std::string(std::get<0>(*route_iter)); - uint32_t dst_node = std::get<2>(*route_iter); - - srcNode = vertex_map->at(current_node).getValue(); - dstNode = vertex_map->at(dst_node).getValue(); - - uint32_t queue_id_x = srcNode->getQueueOutputProcBinding(dstNode); - std::cout << "Queue Id = " << queue_id_x << std::endl; - - std::cout << "Updatating Queue Id = " << queue_id_x; - std::cout << " With " << *routing_arg << std::endl; - srcNode->setOutputQueueRoute(queue_id_x, routing_arg); - } - } - - // insert dummy as node 0 to make BFS easier - addNode( DUMMY, 0, graphOut, llyr_config ); - - std::cout << "Doing fixup for Node-0..." << std::endl; - // fixup for node-0; would be nice to get rid of this one day - typename std::map< uint32_t, Vertex< ProcessingElement* > >::iterator vertexIterator; - for(vertexIterator = vertex_map->begin(); vertexIterator != vertex_map->end(); ++vertexIterator) { - uint32_t current_node = vertexIterator->first; - auto node_iter = std::find_if(node_list.begin(), node_list.end(), [¤t_node] - (const HardwareNode* some_node){ return std::stoul(some_node->pe_id_) == current_node;} ); - - if( vertexIterator->first > 0 ) { - std::cout << vertexIterator->first << std::flush; - std::cout << ": " << (*node_iter)->input_list_->size(); -// std::cout << " --> " << vertexIterator->second.getValue->getNumInputQueues(); - std::cout << std::endl; - - if( (*node_iter)->input_list_->size() == 0 ) { - std::cout << "Adding edge between 0 and " << vertexIterator->first << std::endl; - graphOut.addEdge( 0, vertexIterator->first ); - } - } - } -} - -void PyMapper::getAdjacencyList( std::string opList, std::vector< uint32_t >* vecIn ) -{ - std::cout << "--- Getting Adjacency List --- " << std::endl; - - // clean the input string - opList.erase(remove(opList.begin(), opList.end(), '('), opList.end()); - opList.erase(remove(opList.begin(), opList.end(), ')'), opList.end()); - opList.erase(remove(opList.begin(), opList.end(), '['), opList.end()); - opList.erase(remove(opList.begin(), opList.end(), ']'), opList.end()); - opList.erase(opList.find("operation"), std::string("operation").length()); - - // split into tokens - std::regex delimiter( "," ); - std::sregex_token_iterator iterA(opList.begin(), opList.end(), delimiter, -1); - std::sregex_token_iterator iterB; - std::vector tokenizedSring( iterA, iterB ); - - std::cout << "***Len of vector " << tokenizedSring.size() << std::endl; - // - if( tokenizedSring.size() > 1 ) { - for( uint32_t i = 0; i < tokenizedSring.size(); i++ ) { - std::cout << "+++ " << tokenizedSring[i] << std::endl; - if( i % 2 == 1 ) { - std::cout << "-- " << tokenizedSring[i] << std::endl; - vecIn->push_back(std::stoul(tokenizedSring[i])); - } - } - } -} - -void PyMapper::getStateList( std::string states, std::vector< std::string >* vecIn ) -{ - std::cout << "--- Getting State List --- " << std::endl; -std::cout << states << std::endl; - - // clean the input string - states.erase(remove(states.begin(), states.end(), '['), states.end()); - states.erase(remove(states.begin(), states.end(), ']'), states.end()); - states.erase(states.find("output"), std::string("output").length()); -std::cout << states << std::endl; - - // split into tokens - std::regex delimiter( "," ); - std::sregex_token_iterator iterA(states.begin(), states.end(), delimiter, -1); - std::sregex_token_iterator iterB; - std::vector tokenizedSring( iterA, iterB ); - - std::cout << "***Len of vector " << tokenizedSring.size() << std::endl; - - if( tokenizedSring.size() > 1 ) { - for( uint32_t i = 0; i < tokenizedSring.size(); i++ ) { - std::cout << "+++ " << tokenizedSring[i] << std::endl; - if( i % 2 == 1 ) { - std::cout << "-- " << tokenizedSring[i] << std::endl; - vecIn->push_back(tokenizedSring[i]); - } - } - } -} - -void PyMapper::printDot( std::string fileName, LlyrGraph< ProcessingElement* >* graphIn ) const -{ - std::ofstream outputFile(fileName.c_str(), std::ios::trunc); //open a file for writing (truncate the current contents) - if ( !outputFile ) //check to be sure file is open - std::cerr << "Error opening file."; - - outputFile << "digraph G {" << "\n"; - - std::map< uint32_t, Vertex< ProcessingElement* > >* vertex_map = graphIn->getVertexMap(); - std::map< uint32_t, Vertex >::iterator vertexIterator; - for(vertexIterator = vertex_map->begin(); vertexIterator != vertex_map->end(); ++vertexIterator) { - outputFile << vertexIterator->first << "[label=\""; - opType moop = vertexIterator->second.getValue()->getOpBinding(); - outputFile << vertexIterator->first << " - " << getOpString(moop); - outputFile << "\"];\n"; - } - - for(vertexIterator = vertex_map->begin(); vertexIterator != vertex_map->end(); ++vertexIterator) { - std::vector< Edge* >* adjacencyList = vertexIterator->second.getAdjacencyList(); - - for( auto it = adjacencyList->begin(); it != adjacencyList->end(); ++it ) { - outputFile << vertexIterator->first; - outputFile << "->"; - outputFile << (*it)->getDestination(); - outputFile << "\n"; - } - } - - outputFile << "}"; - outputFile.close(); -} - -}// namespace Llyr -}// namespace SST - -#endif // _PY_MAPPER_H - - diff --git a/src/sst/elements/llyr/mappers/simpleMapper.h b/src/sst/elements/llyr/mappers/simpleMapper.h deleted file mode 100644 index 7f7d956bf7..0000000000 --- a/src/sst/elements/llyr/mappers/simpleMapper.h +++ /dev/null @@ -1,238 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _SIMPLE_MAPPER_H -#define _SIMPLE_MAPPER_H - -#include -#include -#include - -#include "mappers/llyrMapper.h" - -namespace SST { -namespace Llyr { - -class SimpleMapper : public LlyrMapper -{ - -public: - explicit SimpleMapper(Params& params) : - LlyrMapper() {} - ~SimpleMapper() { } - - SST_ELI_REGISTER_MODULE( - SimpleMapper, - "llyr", - "mapper.simple", - SST_ELI_ELEMENT_VERSION(1,0,0), - "App to HW", - SST::Llyr::LlyrMapper - ) - - void mapGraph(LlyrGraph< opType > hardwareGraph, LlyrGraph< AppNode > appGraph, - LlyrGraph< ProcessingElement* > &graphOut, - LlyrConfig* llyr_config); - -private: - - -}; - -void SimpleMapper::mapGraph(LlyrGraph< opType > hardwareGraph, LlyrGraph< AppNode > appGraph, - LlyrGraph< ProcessingElement* > &graphOut, - LlyrConfig* llyr_config) -{ - //setup up i/o for messages - char prefix[256]; - sprintf(prefix, "[t=@t][simpleMapper]: "); - SST::Output* output_ = new SST::Output(prefix, llyr_config->verbosity_, 0, Output::STDOUT); - - std::queue< uint32_t > nodeQueue; - - output_->verbose(CALL_INFO, 32, 0, "Starting mapping\n"); - //Mark all nodes in the PE graph un-visited - std::map< uint32_t, Vertex< AppNode > >* app_vertex_map_ = appGraph.getVertexMap(); - typename std::map< uint32_t, Vertex< AppNode > >::iterator appIterator; - for(appIterator = app_vertex_map_->begin(); appIterator != app_vertex_map_->end(); ++appIterator) { - appIterator->second.setVisited(0); - - output_->verbose(CALL_INFO, 32, 0, "--Vertex %u -- In Degree %u\n", appIterator->first, appIterator->second.getInDegree()); - - if( appIterator->second.getInDegree() == 0 ) { - nodeQueue.push(appIterator->first); - } - } - - //assign new ID to mapped nodes in the graph and track mapping between app and graph - uint32_t newNodeNum = 1; - std::map< uint32_t, uint32_t > mapping; - while( nodeQueue.empty() == 0 ) { - std::stringstream dataOut; - uint32_t currentAppNode = nodeQueue.front(); - nodeQueue.pop(); - - // simple assumes some things about queues - QueueArgMap* arguments = new QueueArgMap; - arguments->emplace( 0, app_vertex_map_->at(currentAppNode).getValue().argument_[0] ); - - app_vertex_map_->at(currentAppNode).setVisited(1); - opType tempOp = app_vertex_map_->at(currentAppNode).getValue().optype_; - if( tempOp == ADDCONST || tempOp == SUBCONST || tempOp == MULCONST || tempOp == DIVCONST || tempOp == REMCONST ) { - addNode( tempOp, arguments, newNodeNum, graphOut, llyr_config ); - } else if( tempOp == INC || tempOp == INC_RST || tempOp == ACC ) { - addNode( tempOp, arguments, newNodeNum, graphOut, llyr_config ); - } else if( tempOp == LDADDR || tempOp == STREAM_LD || tempOp == STADDR || tempOp == STREAM_ST ) { - addNode( tempOp, arguments, newNodeNum, graphOut, llyr_config ); - } else { - addNode( tempOp, newNodeNum, graphOut, llyr_config ); - } - - // create a record of the mapping (new, old) - [[maybe_unused]] auto retVal = mapping.emplace( currentAppNode, newNodeNum ); - output_->verbose(CALL_INFO, 32, 0, "-- Current %" PRIu32 " New %" PRIu32 "\n", currentAppNode, newNodeNum); - output_->verbose(CALL_INFO, 32, 0, "Adjacency list of vertex: %" PRIu32 "\n", currentAppNode); - - // add the destination vertices from this node to the node queue - dataOut << " head"; - std::vector< Edge* >* adjacencyList = app_vertex_map_->at(currentAppNode).getAdjacencyList(); - for( auto it = adjacencyList->begin(); it != adjacencyList->end(); it++ ) { - uint32_t destinationVertex = (*it)->getDestination(); - - if( app_vertex_map_->at(destinationVertex).getVisited() == 0 ) { - dataOut << " -> " << destinationVertex; - app_vertex_map_->at(destinationVertex).setVisited(1); - nodeQueue.push(destinationVertex); - } - else { - dataOut << " +> " << destinationVertex; - } - } - - newNodeNum = newNodeNum + 1; - output_->verbose(CALL_INFO, 32, 0, "%s\n", dataOut.str().c_str()); - } - - // insert dummy as node 0 to make BFS easier - addNode( DUMMY, 0, graphOut, llyr_config ); - - // now add the edges - std::map< uint32_t, Vertex< ProcessingElement* > >* vertex_map_ = graphOut.getVertexMap(); - typename std::map< uint32_t, Vertex< ProcessingElement* > >::iterator vertexIterator; - for(vertexIterator = vertex_map_->begin(); vertexIterator != vertex_map_->end(); ++vertexIterator) { - - // lookup the matched app PE - bool found = 0; - uint32_t appPE; - for( auto it = mapping.begin(); it != mapping.end(); ++it) { - if( it->second == vertexIterator->first ) { - found = 1; - appPE = it->first; - break; - } - } - - if( found == 0 ) { - continue; - } - - // iterate through the adjeceny list of the app graph node and find corresponding mapped-graph node - std::vector< Edge* >* adjacencyList = app_vertex_map_->at(appPE).getAdjacencyList(); - for( auto it = adjacencyList->begin(); it != adjacencyList->end(); it++ ) { - uint32_t destinationVertex = mapping.at((*it)->getDestination()); - graphOut.addEdge( vertexIterator->first, destinationVertex ); - } - - // add edges from the dummy root - output_->verbose(CALL_INFO, 32, 0, "Vertex %" PRIu32 " -- In Degree %" PRIu32 "\n", - vertexIterator->first, vertexIterator->second.getInDegree()); - - if( vertexIterator->second.getInDegree() == 0 ) { - graphOut.addEdge( 0, vertexIterator->first ); - } - } - - //-------------- BFS --------------------------------- - //Mark all nodes in the PE graph un-visited - for(vertexIterator = vertex_map_->begin(); vertexIterator != vertex_map_->end(); ++vertexIterator) { - vertexIterator->second.setVisited(0); - } - - //Node 0 is a dummy node and is always the entry point - nodeQueue.push(0); - - //BFS and add input/output edges - while( nodeQueue.empty() == 0 ) { - uint32_t currentNode = nodeQueue.front(); - nodeQueue.pop(); - std::stringstream dataOut; - - vertex_map_->at(currentNode).setVisited(1); - - output_->verbose(CALL_INFO, 32, 0, "Adjacency list of vertex: %" PRIu32 "\n", currentNode); - std::vector< Edge* >* adjacencyList = vertex_map_->at(currentNode).getAdjacencyList(); - ProcessingElement* srcNode; - ProcessingElement* dstNode; - - //add the destination vertices from this node to the node queue - dataOut << " head"; - for( auto it = adjacencyList->begin(); it != adjacencyList->end(); it++ ) { - uint32_t destinationVertex = (*it)->getDestination(); - - srcNode = vertex_map_->at(currentNode).getValue(); - dstNode = vertex_map_->at(destinationVertex).getValue(); - - dataOut << "\n"; - dataOut << "\tsrcNode " << srcNode->getProcessorId() << "(" << srcNode->getOpBinding() << ")\n"; - dataOut << "\tdstNode " << dstNode->getProcessorId() << "(" << dstNode->getOpBinding() << ")\n"; - output_->verbose(CALL_INFO, 32, 0, "%s\n", dataOut.str().c_str()); - - srcNode->bindOutputQueue(dstNode); - dstNode->bindInputQueue(srcNode); - - if( vertex_map_->at(destinationVertex).getVisited() == 0 ) { - vertex_map_->at(destinationVertex).setVisited(1); - nodeQueue.push(destinationVertex); - } - } - - //FIXME Need to use a fake init on ST for now - opType tempOp = vertex_map_->at(currentNode).getValue()->getOpBinding(); - if( tempOp == ST ) { - vertex_map_->at(currentNode).getValue()->inputQueueInit(); - } else if( tempOp == LDADDR || tempOp == STADDR ) { - vertex_map_->at(currentNode).getValue()->inputQueueInit(); - } else if( tempOp == STREAM_LD || tempOp == STREAM_ST ) { - vertex_map_->at(currentNode).getValue()->inputQueueInit(); - } else if( tempOp == ACC ) { - vertex_map_->at(currentNode).getValue()->inputQueueInit(); - } - } - - //FIXME Fake init for now, need to read values from stack - //Initialize any L/S PEs at the top of the graph - std::vector< Edge* >* rootAdjacencyList = vertex_map_->at(0).getAdjacencyList(); - for( auto it = rootAdjacencyList->begin(); it != rootAdjacencyList->end(); it++ ) { - uint32_t destinationVertex = (*it)->getDestination(); - vertex_map_->at(destinationVertex).getValue()->inputQueueInit(); - } - -}// mapGraph - -}// namespace Llyr -}// namespace SST - -#endif // _SIMPLE_MAPPER_H - diff --git a/src/sst/elements/llyr/parser/instruction.h b/src/sst/elements/llyr/parser/instruction.h deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/src/sst/elements/llyr/parser/parser.cc b/src/sst/elements/llyr/parser/parser.cc deleted file mode 100644 index 5ae6e1a343..0000000000 --- a/src/sst/elements/llyr/parser/parser.cc +++ /dev/null @@ -1,1930 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include "parser.h" - -namespace SST { -namespace Llyr { - -void Parser::generateAppGraph(std::string functionName) -{ - bool foundOffload; - llvm::SMDiagnostic Err; - llvm::LLVMContext Context; - - std::unique_ptr< llvm::MemoryBuffer > irBuff = llvm::MemoryBuffer::getMemBuffer(offloadString_); - std::unique_ptr< llvm::Module > mod(llvm::parseIR(irBuff->getMemBufferRef(), Err, Context)); - mod_ = mod.get(); - - //get names for anonymous instructions - auto pm = std::make_unique(mod_); - pm->add(llvm::createPromoteMemoryToRegisterPass()); - pm->add(llvm::createInstructionNamerPass()); - pm->add(llvm::createIndVarSimplifyPass()); - pm->add(llvm::createLoopUnrollAndJamPass()); - pm->doInitialization(); - - foundOffload = 0; - for( auto functionIter = mod_->getFunctionList().begin(), functionEnd = mod_->getFunctionList().end(); functionIter != functionEnd; ++functionIter ) { - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "Function Name: "; - llvm::errs().write_escaped(functionIter->getName()) << " "; - llvm::errs().write_escaped(llvm::demangle(functionIter->getName().str() )) << '\n'; - } - - //check each located function to see if it's the offload target - if( functionIter->getName().find(functionName) != std::string::npos ) { - pm->run(*functionIter); - - generatebBasicBlockGraph(&*functionIter); - expandBBGraph(&*functionIter); - assembleGraph(); - mergeGraphs(); -// collapseInductionVars(); - - foundOffload = 1; - break; - } - }// function loop - - if( foundOffload == 0 ) { - output_->fatal(CALL_INFO, -1, "Error: No offload target\n"); - exit(0); - } - - output_->verbose(CALL_INFO, 1, 0, "Finished parsing...\n"); - - printCDFG( "00_func-ins.dot" ); - printPyMapper( "00_amapper.dot" ); - -}// generateAppGraph - -void Parser::generatebBasicBlockGraph(llvm::Function* func) -{ - output_->verbose(CALL_INFO, 1, 0, "Generating BB Graph...\n"); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs().write_escaped(llvm::demangle(func->getName().str() )) << '\n'; - } - - for( auto blockIter = func->getBasicBlockList().begin(), blockEnd = func->getBasicBlockList().end(); blockIter != blockEnd; ++blockIter ) { - llvm::Instruction *Inst = llvm::dyn_cast(blockIter->getTerminator()); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "\t+++Basic Block Name(" << &*blockIter << "): "; - llvm::errs().write_escaped(blockIter->getName()) << " ---> " << &*Inst << '\n'; - } - - bbGraph_->addVertex(&*blockIter); - } - - std::map< uint32_t, Vertex< llvm::BasicBlock* > >* vertex_map_ = bbGraph_->getVertexMap(); - typename std::map< uint32_t, Vertex< llvm::BasicBlock* > >::iterator vertexIterator; - for(vertexIterator = vertex_map_->begin(); vertexIterator != vertex_map_->end(); ++vertexIterator) { - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "\nBasic Block " << vertexIterator->second.getValue() << "\n"; - } - - uint32_t totalSuccessors = vertexIterator->second.getValue()->getTerminator()->getNumSuccessors(); - for( uint32_t successor = 0; successor < totalSuccessors; successor++ ) { - llvm::BasicBlock* tempBB = vertexIterator->second.getValue()->getTerminator()->getSuccessor(successor); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "\tSuccessors " << successor << " of " << totalSuccessors << "\n"; - llvm::errs() << "\nBasic Block " << tempBB << "\n"; - } - - typename std::map< uint32_t, Vertex< llvm::BasicBlock* > >::iterator vertexIteratorInner; - for(vertexIteratorInner = vertex_map_->begin(); vertexIteratorInner != vertex_map_->end(); ++vertexIteratorInner) { - if( vertexIteratorInner->second.getValue() == tempBB ) { - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "\t\tFound: " << vertexIteratorInner->second.getValue() << "\n"; - } - - bbGraph_->addEdge(vertexIterator->first, vertexIteratorInner->first); - - break; - } - } - }// successor loop - }// basic block loop - - // bb_Graph should be complete here - output_->verbose(CALL_INFO, 1, 0, "...Basic Block Graph Done.\n"); - bbGraph_->printDot("00_bb.dot"); -}// generatebBasicBlockGraph - - -void Parser::expandBBGraph(llvm::Function* func) -{ - output_->verbose(CALL_INFO, 1, 0, "\n\nGenerating Flow Graph...\n"); - - CDFGVertex* entryVertex; - CDFGVertex* outputVertex; - CDFGVertex* inputVertex; - int32_t inputVertexID = -1; - std::map< llvm::Instruction*, CDFGVertex* >* instructionMap_ = new std::map< llvm::Instruction*, CDFGVertex* >; - - uint32_t tempOpcode; - for( auto blockIter = func->getBasicBlockList().begin(), blockEnd = func->getBasicBlockList().end(); blockIter != blockEnd; ++blockIter ) { - (*flowGraph_)[&*blockIter] = new CDFG; - CDFG &g = *((*flowGraph_)[&*blockIter]); - - (*useNode_)[&*blockIter] = new std::map< CDFGVertex*, std::vector< llvm::Instruction* >* >; - (*defNode_)[&*blockIter] = new std::map< CDFGVertex*, std::vector< llvm::Instruction* >* >; - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "\t+++Basic Block Name(" << &*blockIter << "): "; - llvm::errs().write_escaped(blockIter->getName()) << '\n'; - } - - for( auto instructionIter = blockIter->begin(), instructionEnd = blockIter->end(); instructionIter != instructionEnd; ++instructionIter ) { - tempOpcode = instructionIter->getOpcode(); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "\t\t**(" << &*instructionIter << ") " << *instructionIter << " -- "; - llvm::errs() << "Opcode Name: "; - llvm::errs().write_escaped(instructionIter->getName()) << " "; - llvm::errs().write_escaped(std::to_string(instructionIter->getOpcode())) << "\n"; - } - - outputVertex = new CDFGVertex; - std::string tutu; - llvm::raw_string_ostream rso(tutu); - instructionIter->print(rso); - outputVertex->instructionName_ = rso.str(); - outputVertex->instruction_ = &*instructionIter; - outputVertex->haveConst_ = 0; - outputVertex->intConst_ = 0x00; - outputVertex->floatConst_ = 0x00; - outputVertex->doubleConst_ = 0x00; - - uint32_t outputVertexID = g.addVertex(outputVertex); - (*vertexList_)[&*blockIter].push_back(outputVertex); - - if( g.numVertices() == 1 ) { - entryVertex = outputVertex; - } - - instructionMap_->insert( std::pair< llvm::Instruction*, CDFGVertex* >(&*instructionIter, outputVertex) ); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "-------------------------------------------- Users List --------------------------------------------\n"; - - for( llvm::User *U : instructionIter->users() ) { - if( llvm::Instruction *Inst = llvm::dyn_cast(U) ) { - llvm::errs() << *instructionIter << " is used in instruction:\t"; - llvm::errs() << "(" << &*Inst << ") " << *Inst << "\n"; - } - - } - - llvm::errs() << "----------------------------------------------------------------------------------------------------\n"; - } - - //determine operation - if( tempOpcode == llvm::Instruction::GetElementPtr ) { - std::cout << "R#REWREFDSFDASFA" < *tempUseVector = new std::vector< llvm::Instruction* >; - std::vector< llvm::Instruction* > *tempDefVector = new std::vector< llvm::Instruction* >; - - // create the node/use entries (this should be empty) - (*useNode_)[&*blockIter]->insert( std::pair< CDFGVertex*, std::vector< llvm::Instruction* >* >(outputVertex, tempUseVector) ); - - //create the node/def entries - tempDefVector->push_back(&*instructionIter); - (*defNode_)[&*blockIter]->insert( std::pair< CDFGVertex*, std::vector< llvm::Instruction* >* >(outputVertex, tempDefVector) ); - - for( auto nodeUseEntry = (*useNode_)[&*blockIter]->at(outputVertex)->begin(); nodeUseEntry != (*useNode_)[&*blockIter]->at(outputVertex)->end(); nodeUseEntry++ ) { - CDFGVertex* tempVal = g.getVertex(outputVertexID)->getValue(); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "Node-Use Entry (" << tempVal->instruction_ << "): " << *nodeUseEntry << "\n"; - } - } - - for( auto nodeDefEntry = (*defNode_)[&*blockIter]->at(outputVertex)->begin(); nodeDefEntry != (*defNode_)[&*blockIter]->at(outputVertex)->end(); nodeDefEntry++ ) { - CDFGVertex* tempVal = g.getVertex(outputVertexID)->getValue(); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "Node-Def Entry (" << tempVal->instruction_ << "): " << *nodeDefEntry << "\n"; - } - } - - // END Allocate - } else if( tempOpcode == llvm::Instruction::Ret ) { // BEGIN Return - std::vector< llvm::Instruction* > *tempUseVector = new std::vector< llvm::Instruction* >; - - llvm::Value* tempOperand = llvm::cast(instructionIter)->getReturnValue(); - - // Test for ret val - // If a function returns void, the value returned is a null pointer - if( tempOperand == 0x00) { - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(outputVertex, new std::vector< llvm::Instruction* >) ); - - //create the node/def entries - (*defNode_)[&*blockIter]->insert( std::pair* >(outputVertex, new std::vector< llvm::Instruction* >) ); - } else if( llvm::isa(tempOperand) || llvm::isa(tempOperand) || llvm::isa(tempOperand) ) { - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(outputVertex, new std::vector< llvm::Instruction* >) ); - - //create the node/def entries - (*defNode_)[&*blockIter]->insert( std::pair* >(outputVertex, new std::vector< llvm::Instruction* >) ); - } else if( llvm::isa(tempOperand) ) { - std::map< llvm::Instruction*,CDFGVertex* >::iterator it = instructionMap_->find(llvm::cast(tempOperand)); - if( it != instructionMap_->end() ) { - inputVertex = instructionMap_->at(llvm::cast(tempOperand)); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "+Found " << inputVertex->instruction_ << " in instructionMap_\n"; - } - } else { - inputVertex = new CDFGVertex; - inputVertex->instruction_ = 0x00; - inputVertex->haveConst_ = 0; - inputVertex->intConst_ = 0x00; - inputVertex->floatConst_ = 0x00; - inputVertex->doubleConst_ = 0x00; - - inputVertexID = g.addVertex(inputVertex); - (*vertexList_)[&*blockIter].push_back(inputVertex); - - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - //create the node/def entries - (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - } - - //add variable to node use list - tempUseVector->push_back(llvm::cast(tempOperand)); - - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(outputVertex, tempUseVector) ); - - //create the node/def entries - (*defNode_)[&*blockIter]->insert( std::pair* >(outputVertex, new std::vector< llvm::Instruction* >) ); - } - - if( output_->getVerboseLevel() > 64 ) { - for( auto nodeUseEntry = (*useNode_)[&*blockIter]->at(outputVertex)->begin(); nodeUseEntry != (*useNode_)[&*blockIter]->at(outputVertex)->end(); nodeUseEntry++ ) { - llvm::errs() << "Node-Use Entry (" << outputVertex->instruction_ << "): " << *nodeUseEntry << "\n"; - } - - for( auto nodeDefEntry = (*defNode_)[&*blockIter]->at(outputVertex)->begin(); nodeDefEntry != (*defNode_)[&*blockIter]->at(outputVertex)->end(); nodeDefEntry++ ) { - llvm::errs() << "Node-Def Entry (" << outputVertex->instruction_ << "): " << *nodeDefEntry << "\n"; - } - } - - // END Return - } else if( tempOpcode == llvm::Instruction::Call ) { // BEGIN Call - - std::vector< llvm::Instruction* > *tempUseVector = new std::vector< llvm::Instruction* >; - std::vector< llvm::Instruction* > *tempDefVector = new std::vector< llvm::Instruction* >; - - for( auto operandIter = instructionIter->op_begin(), operandEnd = instructionIter->op_end(); operandIter != operandEnd; ++operandIter ) { - llvm::Value* tempOperand = operandIter->get(); - - if( llvm::isa(tempOperand) ) { - // Don't care about these args at the moment - - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - //create the node/def entries - (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - } else if( llvm::isa(tempOperand) ) { - std::map< llvm::Instruction*,CDFGVertex* >::iterator it = instructionMap_->find(llvm::cast(tempOperand)); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "**** " << static_cast(operandIter->get()) << " -- " << llvm::cast(tempOperand) << " -- "; - } - - - if( it != instructionMap_->end() ) { - inputVertex = instructionMap_->at(llvm::cast(tempOperand)); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "+Found " << inputVertex->instruction_ << " in instructionMap_\n"; - } - - } else { - inputVertex = new CDFGVertex; - inputVertex->instruction_ = 0x00; - inputVertex->haveConst_ = 0; - inputVertex->intConst_ = 0x00; - inputVertex->floatConst_ = 0x00; - inputVertex->doubleConst_ = 0x00; - - inputVertexID = g.addVertex(inputVertex); - (*vertexList_)[&*blockIter].push_back(inputVertex); - - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - //create the node/def entries - (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - } - - //add variable to node use list - tempUseVector->push_back(llvm::cast(tempOperand)); - } - } - - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(outputVertex, tempUseVector) ); - - //create the node/def entries - tempDefVector->push_back(&*instructionIter); - (*defNode_)[&*blockIter]->insert( std::pair* >(outputVertex, tempDefVector) ); - - if( output_->getVerboseLevel() > 64 ) { - for( auto nodeUseEntry = (*useNode_)[&*blockIter]->at(outputVertex)->begin(); nodeUseEntry != (*useNode_)[&*blockIter]->at(outputVertex)->end(); nodeUseEntry++ ) { - llvm::errs() << "Node-Use Entry (" << outputVertex->instruction_ << "): " << *nodeUseEntry << "\n"; - } - - for( auto nodeDefEntry = (*defNode_)[&*blockIter]->at(outputVertex)->begin(); nodeDefEntry != (*defNode_)[&*blockIter]->at(outputVertex)->end(); nodeDefEntry++ ) { - llvm::errs() << "Node-Def Entry (" << outputVertex->instruction_ << "): " << *nodeDefEntry << "\n"; - } - } - - //END Call - } else if( tempOpcode == llvm::Instruction::Br ) { // BEGIN Branch - std::vector< llvm::Instruction* > *tempUseVector = new std::vector< llvm::Instruction* >; - std::vector< llvm::Instruction* > *tempDefVector = new std::vector< llvm::Instruction* >; - - if(llvm::cast(instructionIter)->isConditional() ) { - llvm::Value* tempCond =llvm::cast(instructionIter)->getCondition(); - - std::map< llvm::Instruction*,CDFGVertex* >::iterator it = instructionMap_->find(llvm::cast(tempCond)); - if( it != instructionMap_->end() ) { - inputVertex = instructionMap_->at(llvm::cast(tempCond)); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "+src Found " << inputVertex->instruction_ << " in instructionMap_\n"; - } - } else { - inputVertex = new CDFGVertex; - inputVertex->instruction_ = 0x00; - inputVertex->haveConst_ = 0; - inputVertex->intConst_ = 0x00; - inputVertex->floatConst_ = 0x00; - inputVertex->doubleConst_ = 0x00; - - inputVertexID = g.addVertex(inputVertex); - (*vertexList_)[&*blockIter].push_back(inputVertex); - - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - //create the node/def entries - (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - } - - //add variable to node use list - tempUseVector->push_back(llvm::cast(tempCond)); - } - - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(outputVertex, tempUseVector) ); - - //create the node/def entries - (*defNode_)[&*blockIter]->insert( std::pair* >(outputVertex, tempDefVector) ); - - if( output_->getVerboseLevel() > 64 ) { - for( auto nodeUseEntry = (*useNode_)[&*blockIter]->at(outputVertex)->begin(); nodeUseEntry != (*useNode_)[&*blockIter]->at(outputVertex)->end(); nodeUseEntry++ ) { - llvm::errs() << "Node-Use Entry (" << outputVertex->instruction_ << "): " << *nodeUseEntry << "\n"; - } - - for( auto nodeDefEntry = (*defNode_)[&*blockIter]->at(outputVertex)->begin(); nodeDefEntry != (*defNode_)[&*blockIter]->at(outputVertex)->end(); nodeDefEntry++ ) { - llvm::errs() << "Node-Def Entry (" << outputVertex->instruction_ << "): " << *nodeDefEntry << "\n"; - } - } - - // END Branch - } else if( tempOpcode == llvm::Instruction::Load ) { // BEGIN Load - std::vector< llvm::Instruction* > *tempUseVector = new std::vector< llvm::Instruction* >; - std::vector< llvm::Instruction* > *tempDefVector = new std::vector< llvm::Instruction* >; - - llvm::Value* tempSrc = llvm::cast(instructionIter)->getPointerOperand(); - uint32_t alignment = llvm::cast(instructionIter)->getAlignment(); - - //Get src information - if( llvm::isa(tempSrc) ) { - std::map< llvm::Instruction*,CDFGVertex* >::iterator it = instructionMap_->find(llvm::cast(tempSrc)); - if( it != instructionMap_->end() ) { - inputVertex = instructionMap_->at(llvm::cast(tempSrc)); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "+src Found " << inputVertex->instruction_ << " in instructionMap_\n"; - } - } else { - - outputVertex->haveConst_ = 1; - outputVertex->intConst_ = alignment; - outputVertex->floatConst_ = 0x00; - outputVertex->doubleConst_ = 0x00; - -// inputVertex = new CDFGVertex; -// inputVertex->instruction_ = 0x00; -// inputVertex->haveConst_ = 1; -// inputVertex->intConst_ = alignment; -// inputVertex->floatConst_ = 0x00; -// inputVertex->doubleConst_ = 0x00; - -// inputVertexID = g.addVertex(inputVertex); -// (*vertexList_)[&*blockIter].push_back(inputVertex); -// -// // create the node/use entries -// (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); -// -// //create the node/def entries -// (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - } - - //add variable to node use list - tempUseVector->push_back(llvm::cast(tempSrc)); - } else { - //TODO fix arguments as source - - outputVertex->haveConst_ = 1; - outputVertex->intConst_ = alignment; - outputVertex->floatConst_ = 0x00; - outputVertex->doubleConst_ = 0x00; - outputVertex->valueName_ = tempSrc->getName().str(); - -// inputVertex = new CDFGVertex; -// inputVertex->instruction_ = 0x00; -// inputVertex->haveConst_ = 1; -// inputVertex->intConst_ = alignment; -// inputVertex->floatConst_ = 0x00; -// inputVertex->doubleConst_ = 0x00; -// inputVertex->valueName_ = tempSrc->getName().str(); - -// inputVertexID = g.addVertex(inputVertex); -// g.addEdge(inputVertexID, outputVertexID); -// // if(inserted) -// // { -// // g[edgeDesc].value_t = tempSrc; -// // } -// -// (*vertexList_)[&*blockIter].push_back(inputVertex); -// -// // create the node/use entries -// (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); -// -// //create the node/def entries -// (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - }//end src - - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(outputVertex, tempUseVector) ); - - //create the node/def entries - tempDefVector->push_back(&*instructionIter); - (*defNode_)[&*blockIter]->insert( std::pair* >(outputVertex, tempDefVector) ); - - if( output_->getVerboseLevel() > 64 ) { - for( auto nodeUseEntry = (*useNode_)[&*blockIter]->at(outputVertex)->begin(); nodeUseEntry != (*useNode_)[&*blockIter]->at(outputVertex)->end(); nodeUseEntry++ ) { - llvm::errs() << "Node-Use Entry (" << outputVertex->instruction_ << "): " << *nodeUseEntry << "\n"; - } - - for( auto nodeDefEntry = (*defNode_)[&*blockIter]->at(outputVertex)->begin(); nodeDefEntry != (*defNode_)[&*blockIter]->at(outputVertex)->end(); nodeDefEntry++ ) { - llvm::errs() << "Node-Def Entry (" << outputVertex->instruction_ << "): " << *nodeDefEntry << "\n"; - } - } - - //END Load - } else if( tempOpcode == llvm::Instruction::Store ) { // BEGIN Store - std::vector< llvm::Instruction* > *tempUseVector = new std::vector< llvm::Instruction* >; - std::vector< llvm::Instruction* > *tempDefVector = new std::vector< llvm::Instruction* >; - - llvm::Value* tempDst = llvm::cast(instructionIter)->getPointerOperand(); - llvm::Value* tempSrc = llvm::cast(instructionIter)->getValueOperand(); - - //Get destination dependency - if( llvm::isa(tempDst) ) { - std::map< llvm::Instruction*,CDFGVertex* >::iterator it = instructionMap_->find(llvm::cast(tempDst)); - if( it != instructionMap_->end() ) { - inputVertex = instructionMap_->at(llvm::cast(tempDst)); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "+dst Found " << inputVertex->instruction_ << " in instructionMap_\n"; - } - } else { - inputVertex = new CDFGVertex; - inputVertex->instruction_ = 0x00; - inputVertex->haveConst_ = 0; - inputVertex->intConst_ = 0x00; - inputVertex->floatConst_ = 0x00; - inputVertex->doubleConst_ = 0x00; - - inputVertexID = g.addVertex(inputVertex); - (*vertexList_)[&*blockIter].push_back(inputVertex); - - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - //create the node/def entries - (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - } - - //add variable to node use list - tempUseVector->push_back(llvm::cast(tempDst)); - - } else if( llvm::isa(tempDst) ) { - - outputVertex->haveConst_ = 1; - outputVertex->intConst_ = 0xFF; - outputVertex->floatConst_ = 0x00; - outputVertex->doubleConst_ = 0x00; - outputVertex->valueName_ = tempSrc->getName().str(); - -// inputVertex = new CDFGVertex; -// inputVertex->instruction_ = 0x00; -// inputVertex->haveConst_ = 1; -// inputVertex->intConst_ = 0xFF; -// inputVertex->floatConst_ = 0x00; -// inputVertex->doubleConst_ = 0x00; -// inputVertex->valueName_ = tempSrc->getName().str(); -// -// inputVertexID = g.addVertex(inputVertex); -// ParserEdgeProperties* edgeProp = new ParserEdgeProperties; -// edgeProp->value_ = llvm::cast(instructionIter)->getValueOperand(); -// g.addEdge(inputVertexID, outputVertexID, edgeProp); -// -// (*vertexList_)[&*blockIter].push_back(inputVertex); -// -// // create the node/use entries -// (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); -// -// //create the node/def entries -// (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - } else { - if( llvm::isa(tempDst) ) { // signed/unsigned ints - llvm::ConstantInt* tempConst = llvm::cast(tempDst); - - outputVertex->instruction_ = 0x00; - outputVertex->haveConst_ = 1; - outputVertex->intConst_ = tempConst->getSExtValue(); - outputVertex->floatConst_ = 0x00; - outputVertex->doubleConst_ = 0x00; - -// inputVertex = new CDFGVertex; -// inputVertex->instruction_ = 0x00; -// inputVertex->haveConst_ = 1; -// inputVertex->intConst_ = tempConst->getSExtValue(); -// inputVertex->floatConst_ = 0x00; -// inputVertex->doubleConst_ = 0x00; -// -// inputVertexID = g.addVertex(inputVertex); -// (*vertexList_)[&*blockIter].push_back(inputVertex); -// -// // Insert edge for const here since we can't discover it when we walk the graph -// ParserEdgeProperties* edgeProp = new ParserEdgeProperties; -// edgeProp->value_ = 0x00; -// g.addEdge(inputVertexID, outputVertexID, edgeProp); -// -// // create the node/use entries -// (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); -// -// //create the node/def entries -// (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - } else if( llvm::isa(tempDst) ) { // floats and doubles - llvm::ConstantFP* tempConst = llvm::cast(tempDst); - - outputVertex = new CDFGVertex; - outputVertex->instruction_ = 0x00; - outputVertex->haveConst_ = 1; - outputVertex->intConst_ = 0x00; - if(tempDst->getType()->isFloatTy()) { - outputVertex->doubleConst_ = (double) tempConst->getValueAPF().convertToFloat(); - } else { - outputVertex->doubleConst_ = tempConst->getValueAPF().convertToDouble(); - } - -// inputVertex = new CDFGVertex; -// inputVertex->instruction_ = 0x00; -// inputVertex->haveConst_ = 1; -// inputVertex->intConst_ = 0x00; -// if(tempDst->getType()->isFloatTy()) { -// inputVertex->doubleConst_ = (double) tempConst->getValueAPF().convertToFloat(); -// } else { -// inputVertex->doubleConst_ = tempConst->getValueAPF().convertToDouble(); -// } -// -// inputVertexID = g.addVertex(inputVertex); -// (*vertexList_)[&*blockIter].push_back(inputVertex); -// -// // Insert edge for const here since we can't discover it when we walk the graph -// ParserEdgeProperties* edgeProp = new ParserEdgeProperties; -// edgeProp->value_ = 0x00; -// g.addEdge(inputVertexID, outputVertexID, edgeProp); -// -// // create the node/use entries -// (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); -// -// //create the node/def entries -// (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - } else { - inputVertex = new CDFGVertex; - inputVertex->instruction_ = 0x00; - inputVertex->haveConst_ = 0; - inputVertex->intConst_ = 0x00; - inputVertex->floatConst_ = 0x00; - inputVertex->doubleConst_ = 0x00; - - inputVertexID = g.addVertex(inputVertex); - (*vertexList_)[&*blockIter].push_back(inputVertex); - - // Insert edge for const here since we can't discover it when we walk the graph - ParserEdgeProperties* edgeProp = new ParserEdgeProperties; - edgeProp->value_ = 0x00; - g.addEdge(inputVertexID, outputVertexID, edgeProp); - - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - //create the node/def entries - (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - } - - }// END dst dep check - - //Get source dependency - if( llvm::isa(tempSrc) ) { - std::map< llvm::Instruction*,CDFGVertex* >::iterator it = instructionMap_->find(llvm::cast(tempSrc)); - if( it != instructionMap_->end() ) { - inputVertex = instructionMap_->at(llvm::cast(tempSrc)); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "+src Found " << inputVertex->instruction_ << " in instructionMap_\n"; - } - } else { -// inputVertex = new CDFGVertex; -// inputVertex->instruction_ = 0x00; -// inputVertex->haveConst_ = 0; -// inputVertex->intConst_ = 0x00; -// inputVertex->floatConst_ = 0x00; -// inputVertex->doubleConst_ = 0x00; -// -// inputVertexID = g.addVertex(inputVertex); -// (*vertexList_)[&*blockIter].push_back(inputVertex); -// -// // create the node/use entries -// (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); -// -// //create the node/def entries -// (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - } - - //add variable to node use list - tempUseVector->push_back(llvm::cast(tempSrc)); - } else if( llvm::isa(tempSrc) ) { - - outputVertex->haveConst_ = 1; - outputVertex->intConst_ = 0xFF; - outputVertex->floatConst_ = 0x00; - outputVertex->doubleConst_ = 0x00; - outputVertex->valueName_ = tempSrc->getName().str(); - -// inputVertex = new CDFGVertex; -// inputVertex->instruction_ = 0x00; -// inputVertex->haveConst_ = 1; -// inputVertex->intConst_ = 0xFF; -// inputVertex->floatConst_ = 0x00; -// inputVertex->doubleConst_ = 0x00; -// inputVertex->valueName_ = tempSrc->getName().str(); - -// inputVertexID = g.addVertex(inputVertex); -// (*vertexList_)[&*blockIter].push_back(inputVertex); -// -// ParserEdgeProperties* edgeProp = new ParserEdgeProperties; -// edgeProp->value_ = llvm::cast(instructionIter)->getValueOperand(); -// g.addEdge(inputVertexID, outputVertexID, edgeProp); -// -// // create the node/use entries -// (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); -// -// //create the node/def entries -// (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - } else { - if( llvm::isa(tempSrc) ) { // signed/unsigned ints - llvm::ConstantInt* tempConst = llvm::cast(tempSrc); - - outputVertex->haveConst_ = 1; - outputVertex->intConst_ = tempConst->getSExtValue(); - outputVertex->floatConst_ = 0x00; - outputVertex->doubleConst_ = 0x00; - -// inputVertex = new CDFGVertex; -// inputVertex->instruction_ = 0x00; -// inputVertex->haveConst_ = 1; -// inputVertex->intConst_ = tempConst->getSExtValue(); -// inputVertex->floatConst_ = 0x00; -// inputVertex->doubleConst_ = 0x00; - -// inputVertexID = g.addVertex(inputVertex); -// (*vertexList_)[&*blockIter].push_back(inputVertex); -// -// // Insert edge for const here since we can't discover it when we walk the graph -// ParserEdgeProperties* edgeProp = new ParserEdgeProperties; -// edgeProp->value_ = 0x00; -// g.addEdge(inputVertexID, outputVertexID, edgeProp); -// -// // create the node/use entries -// (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); -// -// //create the node/def entries -// (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - } else if( llvm::isa(tempSrc) ) { // floats and doubles - llvm::ConstantFP* tempConst = llvm::cast(tempSrc); - - outputVertex->haveConst_ = 1; - outputVertex->intConst_ = 0x00; - if(tempSrc->getType()->isFloatTy()) { - outputVertex->doubleConst_ = (double) tempConst->getValueAPF().convertToFloat(); - } else { - outputVertex->doubleConst_ = tempConst->getValueAPF().convertToDouble(); - } - -// inputVertex = new CDFGVertex; -// inputVertex->instruction_ = 0x00; -// inputVertex->haveConst_ = 1; -// inputVertex->intConst_ = 0x00; -// // if(tempSrc->getType()->isFloatTy()) -// // inputVertex->floatConst = tempConst->getValueAPF().convertToFloat(); -// // else -// inputVertex->doubleConst_ = tempConst->getValueAPF().convertToDouble(); - -// inputVertexID = g.addVertex(inputVertex); -// (*vertexList_)[&*blockIter].push_back(inputVertex); -// -// // Insert edge for const here since we can't discover it when we walk the graph -// ParserEdgeProperties* edgeProp = new ParserEdgeProperties; -// edgeProp->value_ = 0x00; -// g.addEdge(inputVertexID, outputVertexID, edgeProp); -// -// // create the node/use entries -// (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); -// -// //create the node/def entries -// (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - } else { - inputVertex = new CDFGVertex; - inputVertex->instruction_ = 0x00; - inputVertex->haveConst_ = 0; - inputVertex->intConst_ = 0x00; - inputVertex->floatConst_ = 0x00; - inputVertex->doubleConst_ = 0x00; - - inputVertexID = g.addVertex(inputVertex); - (*vertexList_)[&*blockIter].push_back(inputVertex); - - // Insert edge for const here since we can't discover it when we walk the graph - ParserEdgeProperties* edgeProp = new ParserEdgeProperties; - edgeProp->value_ = 0x00; - g.addEdge(inputVertexID, outputVertexID, edgeProp); - - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - //create the node/def entries - (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - } - - }// END src dep check - - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(outputVertex, tempUseVector) ); - - //create the node/def entries - tempDefVector->push_back(&*instructionIter); - (*defNode_)[&*blockIter]->insert( std::pair* >(outputVertex, tempDefVector) ); - - if( output_->getVerboseLevel() > 64 ) { - for( auto nodeUseEntry = (*useNode_)[&*blockIter]->at(outputVertex)->begin(); nodeUseEntry != (*useNode_)[&*blockIter]->at(outputVertex)->end(); nodeUseEntry++ ) { - llvm::errs() << "Node-Use Entry (" << outputVertex->instruction_ << "): " << *nodeUseEntry << "\n"; - } - - for( auto nodeDefEntry = (*defNode_)[&*blockIter]->at(outputVertex)->begin(); nodeDefEntry != (*defNode_)[&*blockIter]->at(outputVertex)->end(); nodeDefEntry++ ) { - llvm::errs() << "Node-Def Entry (" << outputVertex->instruction_ << "): " << *nodeDefEntry << "\n"; - } - } - - llvm::errs() << "???????????? STORE END\n"; - - //END Store - } else if( tempOpcode == llvm::Instruction::GetElementPtr ){ // BEGIN GEP - std::vector< llvm::Instruction* > *tempUseVector = new std::vector< llvm::Instruction* >; - std::vector< llvm::Instruction* > *tempDefVector = new std::vector< llvm::Instruction* >; - - for( auto operandIter = instructionIter->op_begin(), operandEnd = instructionIter->op_end(); operandIter != operandEnd; ++operandIter ) { - llvm::Value* tempOperand = operandIter->get(); - - if( llvm::isa(tempOperand) ) { - if( llvm::isa(tempOperand) ) { // signed/unsigned ints - llvm::ConstantInt* tempConst = llvm::cast(tempOperand); - - outputVertex->haveConst_ = 1; - outputVertex->intConst_ = tempConst->getSExtValue(); - outputVertex->floatConst_ = 0x00; - outputVertex->doubleConst_ = 0x00; - -// inputVertex = new CDFGVertex; -// inputVertex->instruction_ = 0x00; -// inputVertex->haveConst_ = 1; -// inputVertex->intConst_ = tempConst->getSExtValue(); -// inputVertex->floatConst_ = 0x00; -// inputVertex->doubleConst_ = 0x00; - -// inputVertexID = g.addVertex(inputVertex); -// (*vertexList_)[&*blockIter].push_back(inputVertex); -// -// // Insert edge for const here since we can't discover it when we walk the graph -// ParserEdgeProperties* edgeProp = new ParserEdgeProperties; -// edgeProp->value_ = 0x00; -// g.addEdge(inputVertexID, outputVertexID, edgeProp); -// -// // create the node/use entries -// (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); -// -// //create the node/def entries -// (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - } else if( llvm::isa(tempOperand) ) { // floats and doubles - llvm::ConstantFP* tempConst = llvm::cast(tempOperand); - - outputVertex->haveConst_ = 1; - outputVertex->intConst_ = 0x00; - if(tempOperand->getType()->isFloatTy()) { - outputVertex->doubleConst_ = (double) tempConst->getValueAPF().convertToFloat(); - } else { - outputVertex->doubleConst_ = tempConst->getValueAPF().convertToDouble(); - } - -// inputVertex = new CDFGVertex; -// inputVertex->instruction_ = 0x00; -// inputVertex->haveConst_ = 1; -// inputVertex->intConst_ = 0x00; -// // if(tempOperand->getType()->isFloatTy()) -// // inputVertex->floatConst = tempConst->getValueAPF().convertToFloat(); -// // else -// inputVertex->doubleConst_ = tempConst->getValueAPF().convertToDouble(); - -// inputVertexID = g.addVertex(inputVertex); -// (*vertexList_)[&*blockIter].push_back(inputVertex); -// -// // Insert edge for const here since we can't discover it when we walk the graph -// ParserEdgeProperties* edgeProp = new ParserEdgeProperties; -// edgeProp->value_ = 0x00; -// g.addEdge(inputVertexID, outputVertexID, edgeProp); -// -// // create the node/use entries -// (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); -// -// //create the node/def entries -// (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - } - - } else if( llvm::isa(tempOperand) ) { - std::map< llvm::Instruction*,CDFGVertex* >::iterator it = instructionMap_->find(llvm::cast(tempOperand)); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "**** " << static_cast(operandIter->get()) << " -- " << - llvm::cast(tempOperand) << " -- "; - } - - if( it != instructionMap_->end() ) { - inputVertex = instructionMap_->at(llvm::cast(tempOperand)); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "+Found " << inputVertex->instruction_ << " in instructionMap_\n"; - } - } else { - inputVertex = new CDFGVertex; - inputVertex->instruction_ = 0x00; - inputVertex->haveConst_ = 0; - inputVertex->intConst_ = 0x00; - inputVertex->floatConst_ = 0x00; - inputVertex->doubleConst_ = 0x00; - - inputVertexID = g.addVertex(inputVertex); - (*vertexList_)[&*blockIter].push_back(inputVertex); - - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - //create the node/def entries - (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - } - - //add variable to node use list - tempUseVector->push_back(llvm::cast(tempOperand)); - } - } - - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(outputVertex, tempUseVector) ); - - //create the node/def entries - tempDefVector->push_back(&*instructionIter); - (*defNode_)[&*blockIter]->insert( std::pair* >(outputVertex, tempDefVector) ); - - if( output_->getVerboseLevel() > 64 ) { - for( auto nodeUseEntry = (*useNode_)[&*blockIter]->at(outputVertex)->begin(); nodeUseEntry != (*useNode_)[&*blockIter]->at(outputVertex)->end(); nodeUseEntry++ ) { - llvm::errs() << "Node-Use Entry (" << outputVertex->instruction_ << "): " << *nodeUseEntry << "\n"; - } - - for( auto nodeDefEntry = (*defNode_)[&*blockIter]->at(outputVertex)->begin(); nodeDefEntry != (*defNode_)[&*blockIter]->at(outputVertex)->end(); nodeDefEntry++ ) { - llvm::errs() << "Node-Def Entry (" << outputVertex->instruction_ << "): " << *nodeDefEntry << "\n"; - } - } - - //END GEP - } else if( tempOpcode == llvm::Instruction::ICmp || tempOpcode == llvm::Instruction::FCmp ) { // BEGIN Int/Float Compare - std::vector< llvm::Instruction* > *tempUseVector = new std::vector< llvm::Instruction* >; - std::vector< llvm::Instruction* > *tempDefVector = new std::vector< llvm::Instruction* >; - - for( auto operandIter = instructionIter->op_begin(), operandEnd = instructionIter->op_end(); operandIter != operandEnd; ++operandIter ) { - llvm::Value* tempOperand = operandIter->get(); - - if( llvm::isa(tempOperand) ) { - if( llvm::isa(tempOperand) ) { // signed/unsigned ints - llvm::ConstantInt* tempConst = llvm::cast(tempOperand); - - outputVertex->haveConst_ = 1; - outputVertex->intConst_ = tempConst->getSExtValue(); - outputVertex->floatConst_ = 0x00; - outputVertex->doubleConst_ = 0x00; - -// inputVertex = new CDFGVertex; -// inputVertex->instruction_ = 0x00; -// inputVertex->haveConst_ = 1; -// inputVertex->intConst_ = tempConst->getSExtValue(); -// inputVertex->floatConst_ = 0x00; -// inputVertex->doubleConst_ = 0x00; - -// inputVertexID = g.addVertex(inputVertex); -// (*vertexList_)[&*blockIter].push_back(inputVertex); -// -// // Insert edge for const here since we can't discover it when we walk the graph -// ParserEdgeProperties* edgeProp = new ParserEdgeProperties; -// edgeProp->value_ = 0x00; -// g.addEdge(inputVertexID, outputVertexID, edgeProp); -// -// // create the node/use entries -// (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); -// -// //create the node/def entries -// (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - } else if( llvm::isa(tempOperand) ) { // floats and doubles - llvm::ConstantFP* tempConst = llvm::cast(tempOperand); - - outputVertex->haveConst_ = 1; - outputVertex->intConst_ = 0x00; - if(tempOperand->getType()->isFloatTy()) { - outputVertex->doubleConst_ = (double) tempConst->getValueAPF().convertToFloat(); - } else { - outputVertex->doubleConst_ = tempConst->getValueAPF().convertToDouble(); - } - -// inputVertex = new CDFGVertex; -// inputVertex->instruction_ = 0x00; -// inputVertex->haveConst_ = 1; -// inputVertex->intConst_ = 0x00; -// // if(tempOperand->getType()->isFloatTy()) -// // inputVertex->floatConst = tempConst->getValueAPF().convertToFloat(); -// // else -// inputVertex->doubleConst_ = tempConst->getValueAPF().convertToDouble(); - -// inputVertexID = g.addVertex(inputVertex); -// (*vertexList_)[&*blockIter].push_back(inputVertex); -// -// // Insert edge for const here since we can't discover it when we walk the graph -// ParserEdgeProperties* edgeProp = new ParserEdgeProperties; -// edgeProp->value_ = 0x00; -// g.addEdge(inputVertexID, outputVertexID, edgeProp); -// -// // create the node/use entries -// (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); -// -// //create the node/def entries -// (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - } - } else if( llvm::isa(tempOperand) ) { - std::map< llvm::Instruction*,CDFGVertex* >::iterator it = instructionMap_->find(llvm::cast(tempOperand)); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "**** " << static_cast(operandIter->get()) << " -- " << llvm::cast(tempOperand) << " -- "; - } - - if( it != instructionMap_->end() ) { - inputVertex = instructionMap_->at(llvm::cast(tempOperand)); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "+Found " << inputVertex->instruction_ << " in instructionMap_\n"; - } - } else { - inputVertex = new CDFGVertex; - inputVertex->instruction_ = 0x00; - inputVertex->haveConst_ = 0; - inputVertex->intConst_ = 0x00; - inputVertex->floatConst_ = 0x00; - inputVertex->doubleConst_ = 0x00; - - inputVertexID = g.addVertex(inputVertex); - (*vertexList_)[&*blockIter].push_back(inputVertex); - - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - //create the node/def entries - (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - } - - //add variable to node use list - tempUseVector->push_back(llvm::cast(tempOperand)); - } else if( llvm::isa(tempOperand) || llvm::isa(tempOperand) ) { - - outputVertex->haveConst_ = 1; - outputVertex->intConst_ = 0xFF; - outputVertex->floatConst_ = 0x00; - outputVertex->doubleConst_ = 0x00; - -// inputVertex = new CDFGVertex; -// inputVertex->instruction_ = 0x00; -// inputVertex->haveConst_ = 1; -// inputVertex->intConst_ = 0xFF; -// inputVertex->floatConst_ = 0x00; -// inputVertex->doubleConst_ = 0x00; - -// inputVertexID = g.addVertex(inputVertex); -// (*vertexList_)[&*blockIter].push_back(inputVertex); -// -// ParserEdgeProperties* edgeProp = new ParserEdgeProperties; -// edgeProp->value_ = 0x00; -// g.addEdge(inputVertexID, outputVertexID, edgeProp); -// -// // create the node/use entries -// (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); -// -// //create the node/def entries -// (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - } - } - - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(outputVertex, tempUseVector) ); - - //create the node/def entries - tempDefVector->push_back(&*instructionIter); - (*defNode_)[&*blockIter]->insert( std::pair* >(outputVertex, tempDefVector) ); - - if( output_->getVerboseLevel() > 64 ) { - for( auto nodeUseEntry = (*useNode_)[&*blockIter]->at(outputVertex)->begin(); nodeUseEntry != (*useNode_)[&*blockIter]->at(outputVertex)->end(); nodeUseEntry++ ) { - llvm::errs() << "Node-Use Entry (" << outputVertex->instruction_ << "): " << *nodeUseEntry << "\n"; - } - - for( auto nodeDefEntry = (*defNode_)[&*blockIter]->at(outputVertex)->begin(); nodeDefEntry != (*defNode_)[&*blockIter]->at(outputVertex)->end(); nodeDefEntry++ ) { - llvm::errs() << "Node-Def Entry (" << outputVertex->instruction_ << "): " << *nodeDefEntry << "\n"; - } - } - - // END Int/Float Compare - } else if( llvm::Instruction::isCast(tempOpcode) ) { // BEGIN llvm::cast (Instruction.def) - std::vector< llvm::Instruction* > *tempUseVector = new std::vector< llvm::Instruction* >; - std::vector< llvm::Instruction* > *tempDefVector = new std::vector< llvm::Instruction* >; - - for( auto operandIter = instructionIter->op_begin(), operandEnd = instructionIter->op_end(); operandIter != operandEnd; ++operandIter ) { - llvm::Value* tempOperand = operandIter->get(); - - if( llvm::isa(tempOperand) ) { - if( llvm::isa(tempOperand) ) { // signed/unsigned ints - llvm::ConstantInt* tempConst = llvm::cast(tempOperand); - - outputVertex->haveConst_ = 1; - outputVertex->intConst_ = tempConst->getSExtValue(); - outputVertex->floatConst_ = 0x00; - outputVertex->doubleConst_ = 0x00; - -// inputVertex = new CDFGVertex; -// inputVertex->instruction_ = 0x00; -// inputVertex->haveConst_ = 1; -// inputVertex->intConst_ = tempConst->getSExtValue(); -// inputVertex->floatConst_ = 0x00; -// inputVertex->doubleConst_ = 0x00; - -// inputVertexID = g.addVertex(inputVertex); -// (*vertexList_)[&*blockIter].push_back(inputVertex); -// -// // Insert edge for const here since we can't discover it when we walk the graph -// ParserEdgeProperties* edgeProp = new ParserEdgeProperties; -// edgeProp->value_ = 0x00; -// g.addEdge(inputVertexID, outputVertexID, edgeProp); -// -// // create the node/use entries -// (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); -// -// //create the node/def entries -// (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - } - else if( llvm::isa(tempOperand) ) // floats and doubles - { - llvm::ConstantFP* tempConst = llvm::cast(tempOperand); - - outputVertex->haveConst_ = 1; - outputVertex->intConst_ = 0x00; - if(tempOperand->getType()->isFloatTy()) { - outputVertex->doubleConst_ = (double) tempConst->getValueAPF().convertToFloat(); - } else { - outputVertex->doubleConst_ = tempConst->getValueAPF().convertToDouble(); - } - -// inputVertex = new CDFGVertex; -// inputVertex->instruction_ = 0x00; -// inputVertex->haveConst_ = 1; -// inputVertex->intConst_ = 0x00; -// // if(tempOperand->getType()->isFloatTy()) -// // inputVertex->floatConst = tempConst->getValueAPF().convertToFloat(); -// // else -// inputVertex->doubleConst_ = tempConst->getValueAPF().convertToDouble(); - -// inputVertexID = g.addVertex(inputVertex); -// (*vertexList_)[&*blockIter].push_back(inputVertex); -// -// // Insert edge for const here since we can't discover it when we walk the graph -// ParserEdgeProperties* edgeProp = new ParserEdgeProperties; -// edgeProp->value_ = 0x00; -// g.addEdge(inputVertexID, outputVertexID, edgeProp); -// -// // create the node/use entries -// (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); -// -// //create the node/def entries -// (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - } - - } else if( llvm::isa(tempOperand) ) { - std::map< llvm::Instruction*,CDFGVertex* >::iterator it = instructionMap_->find(llvm::cast(tempOperand)); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "**** " << static_cast(operandIter->get()) << " -- " << llvm::cast(tempOperand) << " -- "; - } - - if( it != instructionMap_->end() ) { - inputVertex = instructionMap_->at(llvm::cast(tempOperand)); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "+Found " << inputVertex->instruction_ << " in instructionMap_\n"; - } - - } else { - inputVertex = new CDFGVertex; - inputVertex->instruction_ = 0x00; - inputVertex->haveConst_ = 0; - inputVertex->intConst_ = 0x00; - inputVertex->floatConst_ = 0x00; - inputVertex->doubleConst_ = 0x00; - - inputVertexID = g.addVertex(inputVertex); - (*vertexList_)[&*blockIter].push_back(inputVertex); - - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - //create the node/def entries - (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - } - - //add variable to node use list - tempUseVector->push_back(llvm::cast(tempOperand)); - } - - } - - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(outputVertex, tempUseVector) ); - - //create the node/def entries - tempDefVector->push_back(&*instructionIter); - (*defNode_)[&*blockIter]->insert( std::pair* >(outputVertex, tempDefVector) ); - - if( output_->getVerboseLevel() > 64 ) { - for( auto nodeUseEntry = (*useNode_)[&*blockIter]->at(outputVertex)->begin(); nodeUseEntry != (*useNode_)[&*blockIter]->at(outputVertex)->end(); nodeUseEntry++ ) { - llvm::errs() << "Node-Use Entry (" << outputVertex->instruction_ << "): " << *nodeUseEntry << "\n"; - } - - for( auto nodeDefEntry = (*defNode_)[&*blockIter]->at(outputVertex)->begin(); nodeDefEntry != (*defNode_)[&*blockIter]->at(outputVertex)->end(); nodeDefEntry++ ) { - llvm::errs() << "Node-Def Entry (" << outputVertex->instruction_ << "): " << *nodeDefEntry << "\n"; - } - } - - //END CAST - } else if( llvm::Instruction::isBinaryOp(tempOpcode) ) { // BEGIN binary operators & logical operators -- two operands - std::vector< llvm::Instruction* > *tempUseVector = new std::vector< llvm::Instruction* >; - std::vector< llvm::Instruction* > *tempDefVector = new std::vector< llvm::Instruction* >; - - for( auto operandIter = instructionIter->op_begin(), operandEnd = instructionIter->op_end(); operandIter != operandEnd; ++operandIter ) { - llvm::Value* tempOperand = operandIter->get(); - - if( llvm::isa(tempOperand) ) { - if( llvm::isa(tempOperand) ) { // signed/unsigned ints - llvm::ConstantInt* tempConst = llvm::cast(tempOperand); - - outputVertex->haveConst_ = 1; - outputVertex->intConst_ = tempConst->getSExtValue(); - outputVertex->floatConst_ = 0x00; - outputVertex->doubleConst_ = 0x00; - -// inputVertex = new CDFGVertex; -// inputVertex->instruction_ = 0x00; -// inputVertex->haveConst_ = 1; -// inputVertex->intConst_ = tempConst->getSExtValue(); -// inputVertex->floatConst_ = 0x00; -// inputVertex->doubleConst_ = 0x00; - -// inputVertexID = g.addVertex(inputVertex); -// (*vertexList_)[&*blockIter].push_back(inputVertex); -// -// // Insert edge for const here since we can't discover it when we walk the graph -// ParserEdgeProperties* edgeProp = new ParserEdgeProperties; -// edgeProp->value_ = 0x00; -// g.addEdge(inputVertexID, outputVertexID, edgeProp); -// -// // create the node/use entries -// (*useNode_)[&*blockIter]->insert( std::pair* >(outputVertex, new std::vector< llvm::Instruction* >) ); -// -// //create the node/def entries -// (*defNode_)[&*blockIter]->insert( std::pair* >(outputVertex, new std::vector< llvm::Instruction* >) ); - - } else if( llvm::isa(tempOperand) ){ // floats and doubles - llvm::ConstantFP* tempConst = llvm::cast(tempOperand); - - outputVertex->haveConst_ = 1; - outputVertex->intConst_ = 0x00; - if(tempOperand->getType()->isFloatTy()) { - outputVertex->doubleConst_ = (double) tempConst->getValueAPF().convertToFloat(); - } else { - outputVertex->doubleConst_ = tempConst->getValueAPF().convertToDouble(); - } - -// inputVertex = new CDFGVertex; -// inputVertex->instruction_ = 0x00; -// inputVertex->haveConst_ = 1; -// inputVertex->intConst_ = 0x00; -// // if(tempOperand->getType()->isFloatTy()) -// // inputVertex->floatConst = tempConst->getValueAPF().convertToFloat(); -// // else -// inputVertex->doubleConst_ = tempConst->getValueAPF().convertToDouble(); - -// inputVertexID = g.addVertex(inputVertex); -// (*vertexList_)[&*blockIter].push_back(inputVertex); -// -// // Insert edge for const here since we can't discover it when we walk the graph -// ParserEdgeProperties* edgeProp = new ParserEdgeProperties; -// edgeProp->value_ = 0x00; -// g.addEdge(inputVertexID, outputVertexID, edgeProp); -// -// // create the node/use entries -// (*useNode_)[&*blockIter]->insert( std::pair* >(outputVertex, new std::vector< llvm::Instruction* >) ); -// -// //create the node/def entries -// (*defNode_)[&*blockIter]->insert( std::pair* >(outputVertex, new std::vector< llvm::Instruction* >) ); - } - } else if( llvm::isa(tempOperand) ) { - std::map< llvm::Instruction*,CDFGVertex* >::iterator it = instructionMap_->find(llvm::cast(tempOperand)); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "**** " << static_cast(operandIter->get()) << " -- " << llvm::cast(tempOperand) << " -- "; - } - - if( it != instructionMap_->end() ) { - inputVertex = instructionMap_->at(llvm::cast(tempOperand)); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "+Found " << inputVertex->instruction_ << " in instructionMap_\n"; - } - } else { - inputVertex = new CDFGVertex; - inputVertex->instruction_ = 0x00; - inputVertex->haveConst_ = 0; - inputVertex->intConst_ = 0x00; - inputVertex->floatConst_ = 0x00; - inputVertex->doubleConst_ = 0x00; - - inputVertexID = g.addVertex(inputVertex); - (*vertexList_)[&*blockIter].push_back(inputVertex); - - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - - //create the node/def entries - (*defNode_)[&*blockIter]->insert( std::pair* >(inputVertex, new std::vector< llvm::Instruction* >) ); - } - - //add variable to node use list - tempUseVector->push_back(llvm::cast(tempOperand)); - } - - } - - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(outputVertex, tempUseVector) ); - - //create the node/def entries - tempDefVector->push_back(&*instructionIter); - (*defNode_)[&*blockIter]->insert( std::pair* >(outputVertex, tempDefVector) ); - - if( output_->getVerboseLevel() > 64 ) { - for( auto nodeUseEntry = (*useNode_)[&*blockIter]->at(outputVertex)->begin(); nodeUseEntry != (*useNode_)[&*blockIter]->at(outputVertex)->end(); nodeUseEntry++ ) { - llvm::errs() << "Node-Use Entry (" << outputVertex->instruction_ << "): " << *nodeUseEntry << "\n"; - } - - for( auto nodeDefEntry = (*defNode_)[&*blockIter]->at(outputVertex)->begin(); nodeDefEntry != (*defNode_)[&*blockIter]->at(outputVertex)->end(); nodeDefEntry++ ) { - llvm::errs() << "Node-Def Entry (" << outputVertex->instruction_ << "): " << *nodeDefEntry << "\n"; - } - } - - //END ALU - } else { - // create the node/use entries - (*useNode_)[&*blockIter]->insert( std::pair* >(outputVertex, new std::vector< llvm::Instruction* >) ); - - //create the node/def entries - (*defNode_)[&*blockIter]->insert( std::pair* >(outputVertex, new std::vector< llvm::Instruction* >) ); - - if( output_->getVerboseLevel() > 64 ) { - for( auto nodeUseEntry = (*useNode_)[&*blockIter]->at(outputVertex)->begin(); nodeUseEntry != (*useNode_)[&*blockIter]->at(outputVertex)->end(); nodeUseEntry++ ) { - llvm::errs() << "Node-Use Entry (" << outputVertex->instruction_ << "): " << *nodeUseEntry << "\n"; - } - - for( auto nodeDefEntry = (*defNode_)[&*blockIter]->at(outputVertex)->begin(); nodeDefEntry != (*defNode_)[&*blockIter]->at(outputVertex)->end(); nodeDefEntry++ ) { - llvm::errs() << "Node-Def Entry (" << outputVertex->instruction_ << "): " << *nodeDefEntry << "\n"; - } - } - } - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "********************************************* Ins Map *********************************************\n"; - for( std::map< llvm::Instruction*,CDFGVertex* >::iterator it = instructionMap_->begin(); it != instructionMap_->end(); ++it ) { - llvm::errs() << it->first; - llvm::errs() << " "; - } - llvm::errs() << "\n****************************************************************************************************\n"; - - llvm::errs() << "\t\t\tnum operands " << instructionIter->getNumOperands() << "\n"; - for( auto operandIter = instructionIter->op_begin(), operandEnd = instructionIter->op_end(); operandIter != operandEnd; ++operandIter ) { - llvm::errs() << "\t\t\top get " << operandIter->get() << "\n"; - llvm::errs() << "\t\t\top uses " << operandIter->get()->getNumUses() << "\n"; - llvm::errs() << "\t\t\top dump "; - operandIter->get()->dump(); - - if( operandIter->get()->hasName() == 1 ) { - llvm::errs() << "\t\t\tfound "; - llvm::errs().write_escaped(operandIter->get()->getName().str()) << " "; - } else { - llvm::errs() << "\t\t\tempty \n"; - } - llvm::errs() << "\n"; - } - - llvm::errs() << "\n"; - } - } - } - - // should be complete here - output_->verbose(CALL_INFO, 1, 0, "...Flow Graph Done.\n"); - -}//END expandBBGraph - -void Parser::assembleGraph(void) -{ - // Need to assemble the actual graph -- insert edges for def-use chains - // This is done for each vertex in the BB graph and then merged - BBGraph &bbg = *bbGraph_; - - auto vertexMap = bbg.getVertexMap(); - for(auto bbGraphIter = vertexMap->begin(); bbGraphIter != vertexMap->end(); ++bbGraphIter) { -// std::cout << bbGraphIter->first << "[label=\""; -// std::cout << bbGraphIter->second.getValue(); -// std::cout << "\"];\n"; - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "\nConstructing graph for basic block " << bbGraphIter->second.getValue() << "...\n"; - } - - llvm::BasicBlock* basicBlock = bbGraphIter->second.getValue(); - CDFG &g = *((*flowGraph_)[basicBlock]); - - if( output_->getVerboseLevel() > 64 ) { - // Prints list of all instructions in the basic block before processing - for( auto revIt = (*vertexList_)[basicBlock].rbegin(); revIt != (*vertexList_)[basicBlock].rend(); ++revIt) { - llvm::errs() << "\t" << (*revIt)->instruction_ << "\n"; - } - llvm::errs() << "\n\n"; - } - - for( auto revIt = (*vertexList_)[basicBlock].rbegin(); revIt != (*vertexList_)[basicBlock].rend(); ++revIt) { - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "\n\tInstruction: " << (*revIt)->instruction_ << "\n"; - } - - // For each vertex, iterate through the instruction's use list - // Insert an edge between the uses and the next def - // Defs may be origin node, previous store, etc - for( auto nodeUseEntry = (*useNode_)[basicBlock]->at(*revIt)->begin(); nodeUseEntry != (*useNode_)[basicBlock]->at(*revIt)->end(); ++nodeUseEntry ) { - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "\t\tNode Use Entry: " << *nodeUseEntry << "\n"; - } - - // Check for actual instructions used in this vertex - for( auto innerRevIt = revIt; innerRevIt != (*vertexList_)[basicBlock].rend(); ++innerRevIt ) { - llvm::Instruction* innerInst = (*innerRevIt)->instruction_; - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "\t\t\tinner: " << innerInst << "\n"; - } - - if( innerRevIt == revIt || innerInst == 0x00 ) { - continue; - } - - // FIXME Assume Allocate -> Store -> Load chain and skip edge between allocate and load - if( (*revIt)->instruction_->getOpcode() == llvm::Instruction::Load && innerInst->getOpcode() == llvm::Instruction::Alloca ) { - continue; - } - - if( innerInst == *nodeUseEntry ) { - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "\t\t\t\tfound: " << innerInst << "\n"; - } - - ParserEdgeProperties* edgeProp = new ParserEdgeProperties; - edgeProp->value_ = 0x00; - g.addEdge(g[*innerRevIt], g[*revIt], edgeProp); - } - - // Check for RAW and WAW deps - uint32_t opccode = (*revIt)->instruction_->getOpcode(); - if( opccode == llvm::Instruction::Load || opccode == llvm::Instruction::Store ) { - for( auto operandIter = innerInst->op_begin(), operandEnd = innerInst->op_end(); operandIter != operandEnd; ++operandIter ) { - if( innerInst->getOpcode() == llvm::Instruction::Store ) { - if( *operandIter == *nodeUseEntry ) { - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "\t\t\t\t(" << (*innerRevIt)->instruction_ << ") operand " << *operandIter << "\n"; - } - - ParserEdgeProperties* edgeProp = new ParserEdgeProperties; - edgeProp->value_ = 0x00; - g.addEdge(g[*innerRevIt], g[*revIt], edgeProp); - } - } - } - } - } - } - }//END dep chain - - // Finally, check for orphaned nodes - // Want to connect them with any previous node that is non-zero and has zero out-edges - output_->verbose(CALL_INFO, 1, 0, "\nChecking for orphans...\n"); - - auto cdfgVertexMap = g.getVertexMap(); - for(auto cdfgGraphIter = cdfgVertexMap->begin(); cdfgGraphIter != cdfgVertexMap->end(); ++cdfgGraphIter) { - - llvm::Instruction* tempIns = cdfgGraphIter->second.getValue()->instruction_; - if( tempIns != 0x00 ) { - if( tempIns->getOpcode() != llvm::Instruction::Alloca ) { - if( cdfgGraphIter->second.getInDegree() <= 0 && cdfgGraphIter->second.getOutDegree() <= 0 ) { - for( auto revIt = (*vertexList_)[basicBlock].rbegin(); revIt != (*vertexList_)[basicBlock].rend(); ++revIt) { - if( (*revIt)->instruction_ != 0x00 && g.getVertex(g[*revIt])->getOutDegree() <= 0 && (*revIt)->instruction_ != tempIns) { - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "@@@@@@@@@@@@@@@@@@@@@@@@@ " << (*revIt)->instruction_; - llvm::errs() << " to " << tempIns << "\n"; - } - - ParserEdgeProperties* edgeProp = new ParserEdgeProperties; - edgeProp->value_ = 0x00; - g.addEdge(g[*revIt], cdfgGraphIter->first, edgeProp); - } - } - } - } - } - }//END orphan check - } -}//END assembleGraph - -void Parser::mergeGraphs() -{ - output_->verbose(CALL_INFO, 1, 0, "\nMerging graphs\n"); - - functionGraph_ = new CDFG; - BBGraph &bbg = *bbGraph_; - auto bbVertexMap = bbg.getVertexMap(); - for(auto bbGraphIter = bbVertexMap->begin(); bbGraphIter != bbVertexMap->end(); ++bbGraphIter) { - llvm::BasicBlock* basicBlock = bbGraphIter->second.getValue(); - CDFG &g = *((*flowGraph_)[basicBlock]); - CDFG::copyGraph(g, *functionGraph_); - } - - functionGraph_->printDot("00_func.dot"); - - // Connect the individual basic blocks back together - output_->verbose(CALL_INFO, 1, 0, "...adding edges...\n"); - - auto funcVertexMap = functionGraph_->getVertexMap(); - for( auto vertexIterator = funcVertexMap->begin(); vertexIterator != funcVertexMap ->end(); ++vertexIterator ) { - llvm::Instruction* tempIns = vertexIterator->second.getValue()->instruction_; - - if( tempIns != 0x00 ) { - if( tempIns->getOpcode() == llvm::Instruction::Br ) { - llvm::BasicBlock* currentBB = tempIns->getParent(); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "Ins " << &*tempIns << " located in " << &*currentBB << "\n"; - } - - // Identify all successor basic bloacks and identify all entry instructions for each one - // An entry instruction is a non-zero instruction with no in-edges - llvm::BasicBlock* nextBB; - std::vector < llvm::Instruction* > connectorList; - uint32_t totalSuccessors = currentBB->getTerminator()->getNumSuccessors(); - for( uint32_t successor = 0; successor < totalSuccessors; successor++ ) { - nextBB = currentBB->getTerminator()->getSuccessor(successor); - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "-----Next BB: " << nextBB << "\n"; - } - - CDFG &g = *((*flowGraph_)[nextBB]); - auto vertexMap = g.getVertexMap(); - for( auto vertexIteratorInner = vertexMap->begin(); vertexIteratorInner != vertexMap ->end(); ++vertexIteratorInner ) { - bool found = 0; - llvm::Instruction* targetIns = vertexIteratorInner->second.getValue()->instruction_; - std::vector< CDFGVertex* >::reverse_iterator targetIter = std::find((*vertexList_)[nextBB].rbegin(), (*vertexList_)[nextBB].rend(), vertexIteratorInner->second.getValue()); - - if( targetIns != 0x00 ) { - // If there's only a single instruction then we can link directly - // otherwise we need to check for backward deps - if( (*vertexList_)[nextBB].size() == 1 ) { - connectorList.push_back(targetIns); - } else { - for( auto operandIter = targetIns->op_begin(), operandEnd = targetIns->op_end(); operandIter != operandEnd; operandIter++ ) { - if( llvm::isa(*operandIter) ) { - for( std::vector< CDFGVertex* >::reverse_iterator revIt = targetIter + 1; revIt != (*vertexList_)[nextBB].rend(); ++revIt) { - if( (*revIt)->instruction_ == *operandIter ) { - found = 1; - break; - } - } - } - } - - // If the operands do not depend on a previous ins, then we can safely link - if( found == 0 && targetIns->getOpcode() > 10 ) { - connectorList.push_back(targetIns); - } - } - } - } - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "Dumping connector list for " << &*currentBB << ": "; - for( auto connectorIter = connectorList.begin(); connectorIter != connectorList.end(); connectorIter++ ) { - llvm::errs() << *connectorIter << ", "; - } - llvm::errs() << "\n"; - } - } - - for( auto connectorIter = connectorList.begin(); connectorIter != connectorList.end(); connectorIter++ ) { - for( auto vertexIteratorInner = funcVertexMap->begin(); vertexIteratorInner != funcVertexMap ->end(); ++vertexIteratorInner ) { - if( *connectorIter == vertexIteratorInner->second.getValue()->instruction_ ) { - - if( output_->getVerboseLevel() > 64 ) { - llvm::errs() << "\tConnecting (" << &*currentBB; - llvm::errs() << ") " << vertexIterator->second.getValue()->instruction_ << " in " << vertexIterator->second.getValue()->instruction_->getParent(); - llvm::errs() << " to (" << &*nextBB; - llvm::errs() << ") " << vertexIteratorInner->second.getValue()->instruction_ << " in " << vertexIteratorInner->second.getValue()->instruction_->getParent() << "\n"; - } - - ParserEdgeProperties* edgeProp = new ParserEdgeProperties; - edgeProp->value_ = 0x00; - functionGraph_->addEdge(vertexIterator->first, vertexIteratorInner->first, edgeProp ); - } - } - } - } - } - } - - output_->verbose(CALL_INFO, 1, 0, "...merge finished\n"); - functionGraph_->printDot("00_func-m.dot"); - -}//END mergeGraphs - -void Parser::printCDFG( const std::string fileName ) const -{ - //open a file for writing (truncate the current contents) - std::ofstream outputFile(fileName.c_str(), std::ios::trunc); - - //check to be sure file is open - if ( !outputFile ) { - output_->fatal(CALL_INFO, -1, "Error: Cannot open file %s\n", fileName.c_str()); - exit(0); - } - - outputFile << "digraph G {" << "\n"; - - auto funcVertexMap = functionGraph_->getVertexMap(); - for( auto vertexIterator = funcVertexMap->begin(); vertexIterator != funcVertexMap ->end(); ++vertexIterator ) { - - outputFile << vertexIterator->first << "[label=\""; - outputFile << vertexIterator->second.getValue()->instructionName_; - outputFile << "\"];\n"; - } - - for(auto vertexIterator = funcVertexMap->begin(); vertexIterator != funcVertexMap->end(); ++vertexIterator) { - std::vector< Edge* >* adjacencyList = vertexIterator->second.getAdjacencyList(); - - for( auto it = adjacencyList->begin(); it != adjacencyList->end(); ++it ) { - outputFile << vertexIterator->first; - outputFile << "->"; - outputFile << (*it)->getDestination(); - outputFile << "\n"; - } - } - - outputFile << "}"; - outputFile.close(); - - -}//END printCDFG - -void Parser::printVertex ( const CDFGVertex* vertexIn ) const -{ - std::cerr << vertexIn->instruction_ << std::endl; - std::cerr << "\t" << vertexIn->instructionName_ << std::endl; - std::cerr << "\t" << vertexIn->valueName_ << std::endl; - std::cerr << "\t\t" << vertexIn->haveConst_ << std::endl; - std::cerr << "\t\t" << vertexIn->intConst_ << std::endl; - std::cerr << "\t\t" << vertexIn->floatConst_ << std::endl; - std::cerr << "\t\t" << vertexIn->doubleConst_ << std::endl; -// std::cerr << "\t" << vertexIn->leftArg_ << std::endl; -// std::cerr << "\t" << vertexIn->rightArg_ << std::endl; -} - -void Parser::printPyMapper( const std::string fileName ) const -{ - std::ofstream outputFile(fileName.c_str(), std::ios::trunc); //open a file for writing (truncate the current contents) - if ( !outputFile ) //check to be sure file is open - std::cerr << "Error opening file."; - - outputFile << "// model intput" << "\n"; - outputFile << "strict digraph {" << "\n"; - - //need this for type size but there should be a better way - llvm::DataLayout* dataLayout = new llvm::DataLayout(mod_); - - auto funcVertexMap = functionGraph_->getVertexMap(); - for( auto vertexIterator = funcVertexMap->begin(); vertexIterator != funcVertexMap ->end(); ++vertexIterator ) { - - llvm::Instruction* tempInstruction = vertexIterator->second.getValue()->instruction_; - if( tempInstruction != NULL ) { - std::cout << "vertex: " << vertexIterator->first << "\n"; - - //temp const vector - std::map< uint32_t, std::string > constVector; - //write node ID - outputFile << vertexIterator->first << " ["; - outputFile << std::flush; - - //if this is a call, we want to swipper-swap the op with the operand - const char* pos = strstr(tempInstruction->getOpcodeName(), "call"); - std::string newOpcode; - - //write operands - bool first = 0; - outputFile << "input=" << "\""; - for( auto operandIter = tempInstruction->op_begin(), operandEnd = tempInstruction->op_end(); operandIter != operandEnd; ++operandIter ) { - if( first != 0 ) { - outputFile << ":"; - } else { - first = 1; - } - - std::cout << operandIter->get()->getNameOrAsOperand() << " -- boopA" << std::endl; - - if( llvm::isa(operandIter) ) { - std::cout << operandIter->getOperandNo() << "\":"; - std::cout << vertexIterator->second.getValue()->intConst_ << " "; - std::cout << vertexIterator->second.getValue()->floatConst_ << " "; - std::cout << vertexIterator->second.getValue()->doubleConst_ << " "; - std::cout << std::endl; - - if( llvm::isa(operandIter) ) { - llvm::ConstantInt* tempConst = llvm::cast(operandIter); - - constVector.emplace( operandIter->getOperandNo(), tempConst->getNameOrAsOperand() ); - std::cout << tempConst->getNameOrAsOperand() << " -- inboop" << std::endl; - } else if( llvm::isa(operandIter) ) { - llvm::ConstantFP* tempConst = llvm::cast(operandIter); - - //this is super hacky ^-^ - constVector.emplace( operandIter->getOperandNo(), std::to_string(std::stod(tempConst->getNameOrAsOperand())) ); - std::cout << std::to_string(std::stod(tempConst->getNameOrAsOperand())) << " -- fpboop" << std::endl; - } else if( llvm::isa(operandIter) ) { - outputFile << operandIter->get()->getNameOrAsOperand(); - std::cout << operandIter->get()->getNameOrAsOperand() << " -- boopB" << std::endl; - } else if( llvm::isa(operandIter) ) { - if( tempInstruction->getOpcodeName() == pos ) { - outputFile << ""; - newOpcode = operandIter->get()->getNameOrAsOperand(); - } else { - outputFile << operandIter->get()->getNameOrAsOperand(); - std::cout << operandIter->get()->getNameOrAsOperand() << " -- boopG" << std::endl; - } - } else { - output_->fatal(CALL_INFO, -1, "Error: No valid operand\n"); - exit(0); - } - } else { - outputFile << operandIter->get()->getNameOrAsOperand(); - std::cout << operandIter->get()->getNameOrAsOperand() << " -- boopC" << std::endl; - } - - }//end for - outputFile << "\"" << ", "; - - //write constants - outputFile << "consts=" << "\""; - for( auto it = constVector.begin(); it != constVector.end(); ) { - outputFile << it->second << ":" << it->first; - ++it; - if( it != constVector.end() ) { - outputFile << " "; - } - } - outputFile << "\"" << ", "; - - //write outputs - llvm::Value* returnval = llvm::cast(tempInstruction); - outputFile << "output=" << "\""; - if( returnval->hasName() == 1 ) { - outputFile << returnval->getName().str(); - } - outputFile << "\"" << ", "; - - //write op - outputFile << "op=" << "\""; - if( tempInstruction->getOpcodeName() == pos ) { - outputFile << newOpcode; - } else { - outputFile << tempInstruction->getOpcodeName(); - } - outputFile << "\"" << ", "; - - //write type - outputFile << "type=" << "\""; - if( tempInstruction->getType()->isSized() ) { - outputFile << dataLayout->getTypeStoreSize(tempInstruction->getType()); - } - outputFile << "\"" << "];"; - - //finish - outputFile << "\n"; - } - } - - outputFile << std::endl; - for(auto vertexIterator = funcVertexMap->begin(); vertexIterator != funcVertexMap->end(); ++vertexIterator) { - std::vector< Edge* >* adjacencyList = vertexIterator->second.getAdjacencyList(); - - for( auto it = adjacencyList->begin(); it != adjacencyList->end(); ++it ) { - outputFile << vertexIterator->first; - outputFile << "->"; - outputFile << (*it)->getDestination(); - outputFile << "\n"; - } - } - - outputFile << "}"; - outputFile.close(); - -}//END printPyMapper - -void Parser::collapseInductionVars() -{ - std::cout << "\n\n ---Collapse Testing---\n" << std::flush; - - auto funcVertexMap = functionGraph_->getVertexMap(); - for( auto vertexIterator = funcVertexMap->begin(); vertexIterator != funcVertexMap ->end(); ++vertexIterator ) { - - llvm::Instruction* tempInstruction = vertexIterator->second.getValue()->instruction_; - if( tempInstruction != NULL ) { - std::cout << "vertex: " << vertexIterator->first << "\n"; - - //write operands - for( auto operandIter = tempInstruction->op_begin(), operandEnd = tempInstruction->op_end(); operandIter != operandEnd; ++operandIter ) { - std::cout << operandIter->get()->getNameOrAsOperand() << " -- boopA" << std::endl; - - if( llvm::isa(operandIter) ) { - std::cout << operandIter->getOperandNo() << ": "; - std::cout << vertexIterator->second.getValue()->intConst_ << " "; - std::cout << vertexIterator->second.getValue()->floatConst_ << " "; - std::cout << vertexIterator->second.getValue()->doubleConst_ << " "; - std::cout << std::endl; - - if( llvm::isa(operandIter) ) { - llvm::ConstantInt* tempConst = llvm::cast(operandIter); - - std::cout << tempConst->getNameOrAsOperand() << " -- inboop" << std::endl; - - } else if( llvm::isa(operandIter) ) { - llvm::ConstantFP* tempConst = llvm::cast(operandIter); - - std::cout << tempConst->getNameOrAsOperand() << " -- fpboop" << std::endl; - - } else if( llvm::isa(operandIter) ) { - - std::cout << operandIter->get()->getNameOrAsOperand() << " -- boopB" << std::endl; - - } else if( llvm::isa(operandIter) ) { - - std::cout << operandIter->get()->getNameOrAsOperand() << " -- boopG" << std::endl; - } else { - output_->fatal(CALL_INFO, -1, "Error: No valid operand\n"); - exit(0); - } - } else { - - std::cout << operandIter->get()->getNameOrAsOperand() << " -- boopC" << std::endl; - } - - }//end for - } - } - -}//END collapseInductionVars - -} // namespace llyr -} // namespace SST - - - diff --git a/src/sst/elements/llyr/parser/parser.h b/src/sst/elements/llyr/parser/parser.h deleted file mode 100644 index f1aa22d594..0000000000 --- a/src/sst/elements/llyr/parser/parser.h +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _PARSER_H -#define _PARSER_H - -#define DEBUG - -#include - -#include -#include -#include -#include -#include -#include - -#include "graph/graph.h" -#include "llyrTypes.h" -#include "pes/peList.h" - -namespace SST { -namespace Llyr { - -struct alignas(int64_t) ParserEdgeProperties : EdgeProperties -{ - llvm::Value* value_; - int64_t const_; -}; - -struct alignas(int64_t) CDFGVertex -{ - llvm::Instruction* instruction_; - std::string valueName_; - - bool haveConst_; - int64_t intConst_; - float floatConst_; - double doubleConst_; - - std::string instructionName_; - std::string sizeType_; -}; - -typedef LlyrGraph< CDFGVertex* > CDFG; -typedef LlyrGraph< llvm::BasicBlock* > BBGraph; - -class Parser -{ -public: - Parser(const std::string& offloadString, SST::Output* output) : - output_(output), offloadString_(offloadString) - { - vertexList_ = new std::map< llvm::BasicBlock*, std::vector< CDFGVertex* > >; - - defNode_ = new std::map< llvm::BasicBlock*, std::map< CDFGVertex*, std::vector< llvm::Instruction* >* >* >; - useNode_ = new std::map< llvm::BasicBlock*, std::map< CDFGVertex*, std::vector< llvm::Instruction* >* >* >; - - flowGraph_ = new std::map< llvm::BasicBlock*, CDFG* >; - bbGraph_ = new BBGraph; - } - - ~Parser() {}; - - void generateAppGraph( std::string functionName ); - -protected: - -private: - SST::Output* output_; - std::string offloadString_; - std::string offloadTarget_; - - BBGraph* bbGraph_; - CDFG* functionGraph_; - - llvm::Module* mod_; - - std::map< llvm::BasicBlock*, CDFG* >* flowGraph_; - std::map< llvm::BasicBlock*, std::vector< CDFGVertex* > >* vertexList_; - - std::map< llvm::BasicBlock*, std::map< CDFGVertex*, std::vector< llvm::Instruction* >* >* >* defNode_; - std::map< llvm::BasicBlock*, std::map< CDFGVertex*, std::vector< llvm::Instruction* >* >* >* useNode_; - - void generatebBasicBlockGraph(llvm::Function* func); - void expandBBGraph(llvm::Function* func); - void assembleGraph(); - void mergeGraphs(); - - void collapseInductionVars(); - - void printVertex ( const CDFGVertex* ) const; - void printCDFG( const std::string fileName ) const; - void printPyMapper( const std::string fileName ) const; - -}; - -} // namespace LLyr -} // namespace SST - -#endif /* _PARSER_H */ diff --git a/src/sst/elements/llyr/pes/complexPE.h b/src/sst/elements/llyr/pes/complexPE.h deleted file mode 100644 index 2529b6451d..0000000000 --- a/src/sst/elements/llyr/pes/complexPE.h +++ /dev/null @@ -1,162 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _COMPLEX_PE_H -#define _COMPLEX_PE_H - -#include - -#include "pes/processingElement.h" - -namespace SST { -namespace Llyr { - -/** - * @todo write docs - */ -class ComplexProcessingElement : public ProcessingElement -{ -public: - ComplexProcessingElement(opType op_binding, uint32_t processor_id, LlyrConfig* llyr_config) : - ProcessingElement(op_binding, processor_id, llyr_config) - { - latency_ = llyr_config->complex_latency_; - cycles_to_fire_ = latency_; - } - - virtual bool doReceive(LlyrData data) { return 0; }; - - virtual bool doCompute() - { - output_->verbose(CALL_INFO, 4, 0, ">> Compute %s\n", getOpString(op_binding_).c_str()); - - uint64_t intResult = 0x0F; - - std::vector< LlyrData > argList; - LlyrData retVal; - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (0)\n"); - printInputQueue(); - printOutputQueue(); - } - - uint32_t num_ready = 0; - uint32_t num_inputs = 0; - uint32_t total_num_inputs = input_queues_->size(); - - // discover which of the input queues are used for the compute - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - num_inputs = num_inputs + 1; - } - } - - // FIXME check to see of there are any routing jobs -- should be able to do this without waiting to fire - bool routed = doRouting( total_num_inputs ); - - //check to see if all of the input queues have data - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - num_ready = num_ready + 1; - } - } - } - - //if there are values waiting on any of the inputs, this PE could still fire - if( num_ready < num_inputs && num_ready > 0) { - pending_op_ = 1; - } else { - pending_op_ = 0 | routed; - } - - // make sure all of the output queues have room for new data - for( uint32_t i = 0; i < output_queues_->size(); ++i) { - // std::cout << " Queue " << i << " Size " << output_queues_->at(i)->data_queue_->size() << " Max " << queue_depth_ << std::endl; - if( output_queues_->at(i)->data_queue_->size() >= queue_depth_ && *output_queues_->at(i)->routing_arg_ == "" ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 " -- No room in output queue %" PRIu32 ", cannot fire\n", num_inputs, num_ready, i); - return false; - } - } - - //if all inputs are available pull from queue and add to arg list - if( num_inputs == 0 || num_ready < num_inputs ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - return false; - } else if( cycles_to_fire_ > 0 ) { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - cycles_to_fire_ = cycles_to_fire_ - 1; - pending_op_ = 1; - return false; - } else { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - argList.push_back(input_queues_->at(i)->data_queue_->front()); - input_queues_->at(i)->forwarded_ = 0; - input_queues_->at(i)->data_queue_->pop(); - } - } - cycles_to_fire_ = latency_; - } - - // If data tokens in output queue then simulation cannot end - pending_op_ = 1; - - switch( op_binding_ ) { - case TSIN : - intResult = sin(argList[0].to_ullong()); - break; - case TCOS : - intResult = cos(argList[0].to_ullong()); - break; - case TTAN : - intResult = tan(argList[0].to_ullong()); - break; - default : - output_->verbose(CALL_INFO, 0, 0, "Error: could not find corresponding op-%" PRIu32 ".\n", op_binding_); - exit(-1); - } - - retVal = LlyrData(intResult); - - output_->verbose(CALL_INFO, 32, 0, "intResult = %" PRIu64 "\n", intResult); - output_->verbose(CALL_INFO, 32, 0, "retVal = %s\n", retVal.to_string().c_str()); - - //for now push the result to all output queues - for( uint32_t i = 0; i < output_queues_->size(); ++i) { - output_queues_->at(i)->data_queue_->push(retVal); - } - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (1)\n"); - printInputQueue(); - printOutputQueue(); - } - - return true; - } - - //TODO for testing only - virtual void inputQueueInit() {}; - virtual void outputQueueInit() {}; - -}; - -}//SST -}//Llyr - -#endif // _LOGIC_PE_H diff --git a/src/sst/elements/llyr/pes/controlPE.h b/src/sst/elements/llyr/pes/controlPE.h deleted file mode 100644 index 0f6c1e2dc8..0000000000 --- a/src/sst/elements/llyr/pes/controlPE.h +++ /dev/null @@ -1,618 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _CONTROL_PE_H -#define _CONTROL_PE_H - -#include "pes/processingElement.h" - -namespace SST { -namespace Llyr { - -// valid, queue id, data -typedef std::tuple< bool, int32_t, LlyrData > HelperReturn; - -/** - * @todo write docs - */ -class ControlProcessingElement : public ProcessingElement -{ -public: - ControlProcessingElement(opType op_binding, uint32_t processor_id, LlyrConfig* llyr_config) : - ProcessingElement(op_binding, processor_id, llyr_config) - { - do_forward_ = 0; - timeout_ = 5; - } - - virtual bool doReceive(LlyrData data) { return 0; }; - - virtual bool doCompute() - { - // TraceFunction trace(CALL_INFO_LONG); - output_->verbose(CALL_INFO, 4, 0, ">> Compute %s\n", getOpString(op_binding_).c_str()); - - uint64_t intResult = 0x0F; - - std::vector< QueueData > argList(3); - std::vector< bool > forwarded(input_queues_->size(), 0); - LlyrData retVal; - uint32_t queue_id; - bool valid_return; - - if( output_->getVerboseLevel() >= 10 ) { - printInputQueue(); - printOutputQueue(); - } - - uint32_t num_ready = 0; - uint32_t num_inputs = 0; - uint32_t total_num_inputs = input_queues_->size(); - - // discover which of the input queues are used for the compute - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - num_inputs = num_inputs + 1; - } - } - - // FIXME check to see of there are any routing jobs -- should be able to do this without waiting to fire - bool routed = doRouting( total_num_inputs ); - - //check to see if all of the input queues have data - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - num_ready = num_ready + 1; - } - } - } - - //FIXME do control PEs need to know if they're waiting on data? - //if there are values waiting on any of the inputs, this PE could still fire - if( num_ready < num_inputs && num_ready > 0) { - pending_op_ = 1; - } else { - pending_op_ = 0 | routed; - } - - // if all inputs are available pull from queue and add to arg list - // exception is MERGE, which will forward the first data token to arrive - // exception is REPEATER, which is the bane of my existance - if( op_binding_ == MERGE && num_ready > 0 ) { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 "\n", num_inputs, num_ready); - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - argList[i].valid_ = 1; - argList[i].data_ = input_queues_->at(i)->data_queue_->front(); - } else { - argList[i].valid_ = 0; - } - } - } - } else if( op_binding_ == REPEATER ) { - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - argList[i].valid_ = 1; - argList[i].data_ = input_queues_->at(i)->data_queue_->front(); - - forwarded[i] = input_queues_->at(i)->forwarded_; - input_queues_->at(i)->forwarded_ = 0; - input_queues_->at(i)->data_queue_->pop(); - - } else { - argList[i].valid_ = 0; - } - } - } - - if( argList[0].valid_ != 1 ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 "\n", num_inputs, num_ready); - for( uint32_t i = 0; i < argList.size(); ++i ) { - if( argList[i].valid_ == 1 ) { - input_queues_->at(i)->forwarded_ = forwarded[i]; - input_queues_->at(i)->data_queue_->push(argList[i].data_); - } - } - return false; - } else { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 "\n", num_inputs, num_ready); - } - - } else if( num_inputs == 0 || num_ready < num_inputs ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 "\n", num_inputs, num_ready); - return false; - } else { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 "\n", num_inputs, num_ready); - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - argList[i].valid_ = 1; - argList[i].data_ = input_queues_->at(i)->data_queue_->front(); - - forwarded[i] = input_queues_->at(i)->forwarded_; - input_queues_->at(i)->forwarded_ = 0; - input_queues_->at(i)->data_queue_->pop(); - } else { - argList[i].valid_ = 0; - } - } - } - } - - // If data tokens in output queue then simulation cannot end - pending_op_ = 1; - - // variables set in helper function - queue_id = 0; - valid_return = 0; - HelperReturn tempReturn; - - switch( op_binding_ ) { - case SEL : - case ROZ : - case ROO : - case ONEONAND : - case GATED_ONE : - case MERGE : - case REPEATER : - tempReturn = helperFunction(op_binding_, argList[0], argList[1], argList[2]); - retVal = std::get<2>(tempReturn); - queue_id = std::get<1>(tempReturn); - valid_return = std::get<0>(tempReturn); - break; - case ROUTE : - retVal = LlyrData(0x00); - break; - case RET : - retVal = LlyrData(0x00); - break; - default : - output_->verbose(CALL_INFO, 0, 0, "Error: could not find corresponding op-%" PRIu32 ".\n", op_binding_); - exit(-1); - } - - output_->verbose(CALL_INFO, 32, 0, "intResult = %" PRIu64 "\n", intResult); - output_->verbose(CALL_INFO, 32, 0, "retVal = %s\n", retVal.to_string().c_str()); - - // for now push the result to all output queues that need this result - if( op_binding_ == MERGE ) { - input_queues_->at(queue_id)->forwarded_ = forwarded[queue_id]; - input_queues_->at(queue_id)->data_queue_->pop(); - - for( uint32_t i = 0; i < output_queues_->size(); ++i ) { - if( *output_queues_->at(i)->routing_arg_ == "" ) { - output_queues_->at(i)->data_queue_->push(retVal); - } - } - } else if( op_binding_ == REPEATER ) { - if( valid_return == 1 ) { - for( uint32_t i = 0; i < output_queues_->size(); ++i ) { - if( *output_queues_->at(i)->routing_arg_ == "" ) { - output_queues_->at(i)->data_queue_->push(retVal); - } - } - - // need to keep the arg-1 value if it wasn't reset, using queueId of 2 for this - if( argList[0].valid_ == 1 && argList[0].data_ == 0 && queue_id == 2 ) { - if( argList[1].valid_ == 1 ) { - input_queues_->at(1)->forwarded_ = forwarded[1]; - input_queues_->at(1)->data_queue_->push(argList[1].data_); - } - } - } else if( queue_id == 2 ) { - for( uint32_t i = 0; i < argList.size(); ++i ) { - if( argList[i].valid_ == 1 ) { - input_queues_->at(i)->forwarded_ = forwarded[1]; - input_queues_->at(i)->data_queue_->push(argList[i].data_); - } - } - } - } else if( op_binding_ == GATED_ONE ) { - // need to keep the arg-0 value if the data stream didn't gnom it up - if( argList[1].valid_ == 1 && argList[1].data_ == 0 ) { - if( argList[0].valid_ == 1 ) { - input_queues_->at(0)->forwarded_ = forwarded[0]; - input_queues_->at(0)->data_queue_->push(argList[0].data_); - } - } - - if( valid_return == 1 ) { - for( uint32_t i = 0; i < output_queues_->size(); ++i ) { - if( *output_queues_->at(i)->routing_arg_ == "" ) { - output_queues_->at(i)->data_queue_->push(retVal); - } - } - } - } else { - if( valid_return == 1 ) { - for( uint32_t i = 0; i < output_queues_->size(); ++i ) { - if( *output_queues_->at(i)->routing_arg_ == "" ) { - output_queues_->at(i)->data_queue_->push(retVal); - } - } - } - } - - if( output_->getVerboseLevel() >= 10 ) { - printInputQueue(); - printOutputQueue(); - } - - return true; - } - - //TODO for testing only - virtual void inputQueueInit() {}; - virtual void outputQueueInit() {}; - -protected: - uint16_t do_forward_; - - HelperReturn helperFunction( opType op, QueueData arg0, QueueData arg1, QueueData arg2 ) - { - - std::cout << "ARG[0]:" << arg0.valid_ << "::" << arg0.data_ << "-> " << arg0.data_.to_ullong() << std::endl; - std::cout << "ARG[1]:" << arg1.valid_ << "::" << arg1.data_ << "-> " << arg1.data_.to_ullong() << std::endl; - std::cout << "ARG[2]:" << arg2.valid_ << "::" << arg2.data_ << "-> " << arg2.data_.to_ullong() << std::endl; - - // SEL - Select: If control (arg2) is 0, send arg0, else send arg1 - // ROS - Route on Signal: Push stored token out when input-1 is high - // ROZ - Route on 0: Forward data if control (arg0) == 0 - // ROO - Route on 1: Forward data if control (arg0) == 1 - // ONEONAND - Test & Set: Emit a '1' if both inputs are 1, else drop the inputs and emit nothing TODO move to logic - // MERGE - Choose One: Forward first token to arrive - // REPEATER - Repeater: If ctrl = 0, fwd buffer; if buffer empty, fill buffer and fwd; if ctrl = 1, fill buffer - // FILTER - Filter: Filter based on value (e.g. if filter 0s, forward all but 0s) - if( op == SEL ) { - if( arg2.valid_ == 1 ) { - switch( arg2.data_.to_ullong() ) { - case 0 : - return std::make_tuple(1, 0, arg0.data_); - break; - case 1 : - return std::make_tuple(1, 1, arg1.data_);; - break; - default : - output_->verbose( CALL_INFO, 0, 0, "Error: invalid select signal.\n" ); - exit(-1); - } - } else { - output_->verbose( CALL_INFO, 0, 0, "Error: invalid select signal.\n" ); - exit(-1); - } - } else if( op == ROS ) { - if( arg1.valid_ == 1 ) { - do_forward_ = 1; - return std::make_tuple(1, 0, arg0.data_); - } else { - do_forward_ = 0; - return std::make_tuple(0, 0, LlyrData(0x00)); - } - } else if( op == RNE ) { - if( arg1.valid_ == 1 ) { - if( arg0.data_ == arg1.data_ ) { - return std::make_tuple(0, 0, LlyrData(0x00)); - } else { - return std::make_tuple(1, 1, LlyrData(arg1.data_)); - } - } else { - output_->verbose( CALL_INFO, 0, 0, "Error: invalid select signal.\n" ); - exit(-1); - } - } else if( op == ROZ ) { - if( arg0.valid_ == 1 && arg0.data_[0] == 0 ) { - return std::make_tuple(1, 1, arg1.data_); - } else if( arg0.valid_ == 1 && arg0.data_[0] == 1 ) { - return std::make_tuple(0, 0, LlyrData(0xFF)); - } else { - output_->verbose( CALL_INFO, 0, 0, "Error: invalid select signal.\n" ); - exit(-1); - } - } else if( op == ROO ) { - if( arg0.valid_ == 1 && arg0.data_[0] == 1 ) { - return std::make_tuple(1, 1, arg1.data_); - } else if( arg0.valid_ == 1 && arg0.data_[0] == 0 ) { - return std::make_tuple(0, 0, LlyrData(0xFF)); - } else { - output_->verbose( CALL_INFO, 0, 0, "Error: invalid select signal.\n" ); - exit(-1); - } - } else if( op == ONEONAND ) { - if( arg0.valid_ == 1 && arg1.valid_ == 1 ) { - if( arg0.data_[0] && arg1.data_[0] ) { - return std::make_tuple(1, 0, LlyrData(0x01)); - } else { - return std::make_tuple(0, 0, LlyrData(0xFF)); - } - } else { - output_->verbose( CALL_INFO, 0, 0, "Error: invalid data signals.\n" ); - exit(-1); - } - } else if( op == GATED_ONE ) { - if( arg0.valid_ == 1 && arg1.valid_ == 1 ) { - if( arg0.data_[0] == 0 && arg1.data_[0] == 0 ) { - return std::make_tuple(1, 0, LlyrData(0x00)); - } else if( arg0.data_[0] == 0 && arg1.data_[0] == 1 ) { - return std::make_tuple(0, 0, LlyrData(0xFF)); - } else if( arg0.data_[0] == 1 && arg1.data_[0] == 0 ) { - return std::make_tuple(1, 0, LlyrData(0x00)); - } else if( arg0.data_[0] == 1 && arg1.data_[0] == 1 ) { - return std::make_tuple(1, 0, LlyrData(0x01)); - } else { - return std::make_tuple(0, 0, LlyrData(0xFF)); - } - } else { - output_->verbose( CALL_INFO, 0, 0, "Error: invalid data signals.\n" ); - exit(-1); - } - } else if( op == MERGE ) { - if( arg0.valid_ == 1 ) { - return std::make_tuple(1, 0, arg0.data_); - } else if( arg1.valid_ == 1 ) { - return std::make_tuple(1, 1, arg1.data_); - } else { - return std::make_tuple(0, 0, LlyrData(0xFF)); - } - } else if( op == FILTER ) { - if( arg0.data_ == 0 && arg1.data_ == 0 ) { - do_forward_ = 0; - return std::make_tuple(0, 0, LlyrData(0xFF)); - } else if( arg0.data_ == 1 && arg1.data_ == 1 ) { - do_forward_ = 0; - return std::make_tuple(0, 0, LlyrData(0x00)); - } else { - return std::make_tuple(1, 1, LlyrData(arg1.data_)); - } - } else if( op == REPEATER ) { - if( arg0.valid_ == 1 && arg0.data_[0] == 0 ) { - if( control_buffer_.size() > 0 ) { - output_->verbose( CALL_INFO, 0, 0, "RPTR AAAAAA\n" ); - return std::make_tuple(1, 2, control_buffer_.front()); - } else if( arg1.valid_ == 1 ) { - output_->verbose( CALL_INFO, 0, 0, "RPTR XXXXXXX\n" ); - control_buffer_.push(arg1.data_); - return std::make_tuple(1, 1, control_buffer_.front()); - } else { - return std::make_tuple(0, 2, 0xFF); - } - } else if( arg0.valid_ == 1 && arg0.data_[0] == 1 ) { - if( arg1.valid_ == 1 ) { - output_->verbose( CALL_INFO, 0, 0, "RPTR NNNNNNN\n" ); - control_buffer_.pop(); - control_buffer_.push(arg1.data_); - return std::make_tuple(0, 0, 0xFF); - } else { - output_->verbose( CALL_INFO, 0, 0, "RPTR QQQQQQQQ\n" ); - return std::make_tuple(0, 2, 0xFF); - } - } else { - output_->verbose( CALL_INFO, 0, 0, "Error-2: invalid data signals.\n" ); - exit(-1); - } - } else { - output_->verbose( CALL_INFO, 0, 0, "Error: could not find corresponding op-%" PRIu32 ".\n", op ); - exit(-1); - } - } - -private: - std::queue< LlyrData > control_buffer_; -}; - -class ControlConstProcessingElement : public ControlProcessingElement -{ -public: - ControlConstProcessingElement(opType op_binding, uint32_t processor_id, LlyrConfig* llyr_config, - QueueArgMap* arguments) : - ControlProcessingElement(op_binding, processor_id, llyr_config) - { - first_touch_ = 1; - - // iterate through the arguments and set initial queue values - for( auto it = arguments->begin(); it != arguments->end(); ++it ) { - auto retVal = input_queues_init_.emplace( it->first, it->second ); - if( retVal.second == false ) { - ///TODO - } - } - } - - virtual bool doCompute() - { - // TraceFunction trace(CALL_INFO_LONG); - output_->verbose(CALL_INFO, 4, 0, ">> Compute %s\n", getOpString(op_binding_).c_str()); - - uint64_t intResult = 0x0F; - - std::vector< QueueData > argList(3); - LlyrData retVal; - - if( output_->getVerboseLevel() >= 10 ) { - printInputQueue(); - printOutputQueue(); - } - - uint32_t num_ready = 0; - uint32_t num_inputs = 0; - uint32_t total_num_inputs = input_queues_->size(); - - // discover which of the input queues are used for the compute - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - num_inputs = num_inputs + 1; - } - } - - // FIXME check to see of there are any routing jobs -- should be able to do this without waiting to fire - bool routed = doRouting( total_num_inputs ); - - //check to see if all of the input queues have data - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - num_ready = num_ready + 1; - } - } - } - - pending_op_ = 0 | routed; - //if there are values waiting on any of the inputs (queue-0 is a const), this PE could still fire - for( uint32_t i = 1; i < total_num_inputs; ++i ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - pending_op_ = 1; - } else { - pending_op_ = 0 | routed; - } - } - - // if all inputs are available pull from queue and add to arg list - if( num_inputs == 0 || num_ready < num_inputs ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 "\n", num_inputs, num_ready); - return false; - } else { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 "\n", num_inputs, num_ready); - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - argList[i].valid_ = 1; - argList[i].data_ = input_queues_->at(i)->data_queue_->front(); - - if( op_binding_ != ROS ) { - input_queues_->at(i)->forwarded_ = 0; - input_queues_->at(i)->data_queue_->pop(); - } - } else { - argList[i].valid_ = 0; - } - } - } - } - - // If data tokens in output queue then simulation cannot end - pending_op_ = 1; - - // variables set in helper function - bool valid_return = 0; - HelperReturn tempReturn; - - switch( op_binding_ ) { - case ROS : - case RNE : - case FILTER : - tempReturn = helperFunction(op_binding_, argList[0], argList[1], argList[2]); - retVal = std::get<2>(tempReturn); - valid_return = std::get<0>(tempReturn); - break; - default : - output_->verbose(CALL_INFO, 0, 0, "Error: could not find corresponding op-%" PRIu32 ".\n", op_binding_); - exit(-1); - } - - output_->verbose(CALL_INFO, 32, 0, "intResult = %" PRIu64 "\n", intResult); - output_->verbose(CALL_INFO, 32, 0, "retVal = %s\n", retVal.to_string().c_str()); - - //for now push the result to all output queues that need this result - if( op_binding_ == ROS ) { - if( do_forward_ == 1 ) { - input_queues_->at(1)->data_queue_->pop(); - - for( uint32_t i = 0; i < output_queues_->size(); ++i ) { - if( *output_queues_->at(i)->routing_arg_ == "" ) { - output_queues_->at(i)->data_queue_->push(retVal); - } - } - } - } else if( op_binding_ == FILTER ) { - // this is so hacky -- need to preserve the const - input_queues_->at(0)->data_queue_->push(argList[0].data_); - if( valid_return == 1 ) { - for( uint32_t i = 0; i < output_queues_->size(); ++i ) { - if( *output_queues_->at(i)->routing_arg_ == "" ) { - output_queues_->at(i)->data_queue_->push(retVal); - } - } - } - } else if( op_binding_ == RNE ) { - if( argList[0].valid_ == 1 ) { - input_queues_->at(0)->data_queue_->push(argList[0].data_); - } - - if( valid_return == 1 ) { - for( uint32_t i = 0; i < output_queues_->size(); ++i ) { - if( *output_queues_->at(i)->routing_arg_ == "" ) { - output_queues_->at(i)->data_queue_->push(retVal); - } - } - } - } else { - if( valid_return == 1 ) { - for( uint32_t i = 0; i < output_queues_->size(); ++i ) { - if( *output_queues_->at(i)->routing_arg_ == "" ) { - output_queues_->at(i)->data_queue_->push(retVal); - } - } - } - } - - if( output_->getVerboseLevel() >= 10 ) { - printInputQueue(); - printOutputQueue(); - } - - return true; - }// doCompute - - virtual void inputQueueInit() - { - output_->verbose(CALL_INFO, 4, 0, ">> Fake Init Input Queue(%" PRIu32 "), Op %" PRIu32 " \n", - processor_id_, op_binding_ ); - - while( input_queues_->size() < input_queues_init_.size() ) { - LlyrQueue* tempQueue = new LlyrQueue; - tempQueue->forwarded_ = 0; - tempQueue->argument_ = 0; - tempQueue->routing_arg_ = new std::string(""); - tempQueue->data_queue_ = new std::queue< LlyrData >; - input_queues_->push_back(tempQueue); - } - - //TODO Need a more elegant way to initialize these queues - uint32_t queue_id = 0; - if( input_queues_init_.size() > 0 ) { - for( auto it = input_queues_init_.begin(); it != input_queues_init_.end(); ++it ) { - if( it->first == queue_id ) { - int64_t init_value = std::stoll(it->second); - LlyrData temp = LlyrData(init_value); - input_queues_->at(queue_id)->data_queue_->push(temp); - } - queue_id = queue_id + 1; - } - } - } - -private: - bool first_touch_; - std::map< uint32_t, Arg > input_queues_init_; - -};// ControlConstProcessingElement - -}//SST -}//Llyr - -#endif // _LOGIC_PE_H - diff --git a/src/sst/elements/llyr/pes/dummyPE.h b/src/sst/elements/llyr/pes/dummyPE.h deleted file mode 100644 index 749311b277..0000000000 --- a/src/sst/elements/llyr/pes/dummyPE.h +++ /dev/null @@ -1,48 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _DUMMY_PE_H -#define _DUMMY_PE_H - -#include "pes/processingElement.h" - -namespace SST { -namespace Llyr { - -/** - * @todo write docs - */ -class DummyProcessingElement : public ProcessingElement -{ -public: - DummyProcessingElement(opType op_binding, uint32_t processor_id, LlyrConfig* llyr_config) : - ProcessingElement(op_binding, processor_id, llyr_config) - { - } - - virtual bool doSend() { return 0; }; - virtual bool doReceive(LlyrData data) { return 0; }; - virtual bool doCompute() { return 0; }; - - //TODO for testing only - virtual void inputQueueInit() {}; - virtual void outputQueueInit() {}; - -}; - -}//SST -}//Llyr - -#endif // _DUMMY_PE_H diff --git a/src/sst/elements/llyr/pes/fpPE.h b/src/sst/elements/llyr/pes/fpPE.h deleted file mode 100644 index 1e27b36b30..0000000000 --- a/src/sst/elements/llyr/pes/fpPE.h +++ /dev/null @@ -1,246 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _FP_PE_H -#define _FP_PE_H - -#include -#include - -#include "pes/processingElement.h" - -namespace SST { -namespace Llyr { - -/** - * @todo write docs - */ -class FPProcessingElement : public ProcessingElement -{ -public: - FPProcessingElement(opType op_binding, uint32_t processor_id, LlyrConfig* llyr_config) : - ProcessingElement(op_binding, processor_id, llyr_config) - { - if( op_binding == FDIV ) { - latency_ = llyr_config->fp_div_latency_; - } else if( op_binding == FMUL ) { - latency_ = llyr_config->fp_mul_latency_; - } else { - latency_ = llyr_config->fp_latency_; - } - cycles_to_fire_ = latency_; - } - - virtual bool doReceive(LlyrData data) { return 0; }; - - virtual bool doCompute() - { - output_->verbose(CALL_INFO, 4, 0, ">> Compute %s\n", getOpString(op_binding_).c_str()); - - uint64_t intResult = 0x0F; - - std::vector< LlyrData > argList; - std::vector< double > convList; - LlyrData retVal; - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (0)\n"); - printInputQueue(); - printOutputQueue(); - } - - uint32_t num_ready = 0; - uint32_t num_inputs = 0; - uint32_t total_num_inputs = input_queues_->size(); - - // discover which of the input queues are used for the compute - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - num_inputs = num_inputs + 1; - } - } - - // FIXME check to see of there are any routing jobs -- should be able to do this without waiting to fire - bool routed = doRouting( total_num_inputs ); - - //check to see if all of the input queues have data - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - num_ready = num_ready + 1; - } - } - } - - //if there are values waiting on any of the inputs, this PE could still fire - if( num_ready < num_inputs && num_ready > 0) { - pending_op_ = 1; - } else { - pending_op_ = 0 | routed; - } - - // make sure all of the output queues have room for new data - for( uint32_t i = 0; i < output_queues_->size(); ++i) { - // std::cout << " Queue " << i << " Size " << output_queues_->at(i)->data_queue_->size() << " Max " << queue_depth_ << std::endl; - if( output_queues_->at(i)->data_queue_->size() >= queue_depth_ && *output_queues_->at(i)->routing_arg_ == "" ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 " -- No room in output queue %" PRIu32 ", cannot fire\n", num_inputs, num_ready, i); - return false; - } - } - - //if all inputs are available pull from queue and add to arg list - if( num_inputs == 0 || num_ready < num_inputs ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - return false; - } else if( cycles_to_fire_ > 0 ) { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - cycles_to_fire_ = cycles_to_fire_ - 1; - pending_op_ = 1; - return false; - } else { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - argList.push_back(input_queues_->at(i)->data_queue_->front()); - input_queues_->at(i)->forwarded_ = 0; - input_queues_->at(i)->data_queue_->pop(); - } - } - cycles_to_fire_ = latency_; - } - - // If data tokens in output queue then simulation cannot end - pending_op_ = 1; - - //need to convert from the raw bits to floating point - for(auto it = argList.begin() ; it != argList.end(); ++it ) { - double fpResult = bits_to_double(*it); - convList.push_back(fpResult); - } - - double fpResult; - std::stringstream dataOut; - switch( op_binding_ ) { - case FADD : - fpResult = convList[0] + convList[1]; - dataOut << std::setprecision(std::numeric_limits::max_digits10) << fpResult << " = "; - dataOut << std::setprecision(std::numeric_limits::max_digits10) << convList[0] << " + " << convList[1]; - dataOut << std::endl; - break; - case FSUB : - fpResult = convList[0] - convList[1]; - dataOut << std::setprecision(std::numeric_limits::max_digits10) << fpResult << " = "; - dataOut << std::setprecision(std::numeric_limits::max_digits10) << convList[0] << " - " << convList[1]; - dataOut << std::endl; - break; - case FMUL : - fpResult = convList[0] * convList[1]; - dataOut << std::setprecision(std::numeric_limits::max_digits10) << fpResult << " = "; - dataOut << std::setprecision(std::numeric_limits::max_digits10) << convList[0] << " * " << convList[1]; - dataOut << std::endl; - break; - case FDIV : - fpResult = convList[0] / convList[1]; - dataOut << std::setprecision(std::numeric_limits::max_digits10) << fpResult << " = "; - dataOut << std::setprecision(std::numeric_limits::max_digits10) << convList[0] << " / " << convList[1]; - dataOut << std::endl; - break; - default : - output_->verbose(CALL_INFO, 0, 0, "Error: could not find corresponding op-%" PRIu32 ".\n", op_binding_); - exit(-1); - } - output_->verbose(CALL_INFO, 32, 0, "%s\n", dataOut.str().c_str()); - - //convert the fp value back to raw bits for storage - retVal = LlyrData(fp_to_bits(&fpResult)); - - output_->verbose(CALL_INFO, 32, 0, "intResult = %f\n", fpResult); - output_->verbose(CALL_INFO, 32, 0, "intResult = %" PRIu64 "\n", intResult); - output_->verbose(CALL_INFO, 32, 0, "retVal = %s\n", retVal.to_string().c_str()); - - //for now push the result to all output queues - for( uint32_t i = 0; i < output_queues_->size(); ++i) { - output_queues_->at(i)->data_queue_->push(retVal); - } - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (1)\n"); - printInputQueue(); - printOutputQueue(); - } - - return true; - } - - //TODO for testing only - virtual void inputQueueInit() {}; - virtual void outputQueueInit() {}; - -private: - //helper to convert from bitset to float - float bits_to_float( std::bitset valIn ) - { - const auto newValue = valIn.to_ullong(); - constexpr auto size = sizeof(float); - uint8_t fpBuffer[size] = {}; - std::memcpy(fpBuffer, std::addressof(newValue), size); - - float fpResult; - std::memcpy(std::addressof(fpResult), fpBuffer, size); - - return fpResult; - } - - //helper to convert from bitset to double - double bits_to_double( std::bitset valIn ) - { - const auto newValue = valIn.to_ullong(); - constexpr auto size = sizeof(double); - uint8_t fpBuffer[size] = {}; - std::memcpy(fpBuffer, std::addressof(newValue), size); - - double fpResult; - std::memcpy(std::addressof(fpResult), fpBuffer, size); - - return fpResult; - } - - //helper to convert from fp to bitset - template - std::bitset fp_to_bits( T* fpIn ) - { - uint64_t intResult = 0; - constexpr auto size = sizeof(T); - uint8_t bufferA[size] = {}; - - std::memcpy(bufferA, std::addressof(*fpIn), size); - std::memcpy(std::addressof(intResult), bufferA, size); - std::bitset myBits = std::bitset(intResult); - - return myBits; - } - - //helper for debugging -- convert bitset to string - std::string bits_to_string( std::bitset valIn ) - { - return valIn.to_string(); - } - -}; - -}//SST -}//Llyr - -#endif // _FP_PE_H diff --git a/src/sst/elements/llyr/pes/intPE.h b/src/sst/elements/llyr/pes/intPE.h deleted file mode 100644 index 47f463e7ab..0000000000 --- a/src/sst/elements/llyr/pes/intPE.h +++ /dev/null @@ -1,657 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _INT_PE_H -#define _INT_PE_H - -#include "pes/processingElement.h" - -namespace SST { -namespace Llyr { - -/** - * @todo write docs - */ -class IntProcessingElement : public ProcessingElement -{ -public: - IntProcessingElement(opType op_binding, uint32_t processor_id, LlyrConfig* llyr_config) : - ProcessingElement(op_binding, processor_id, llyr_config) - { - if( op_binding == DIV ) { - latency_ = llyr_config->int_div_latency_; - } else { - latency_ = llyr_config->int_latency_; - } - cycles_to_fire_ = latency_; - } - - virtual bool doReceive(LlyrData data) { return 0; }; - - virtual bool doCompute() - { - output_->verbose(CALL_INFO, 4, 0, ">> Compute %s\n", getOpString(op_binding_).c_str()); - - uint64_t intResult = 0x0F; - - std::vector< LlyrData > argList; - LlyrData retVal; - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (0)\n"); - printInputQueue(); - printOutputQueue(); - } - - uint32_t num_ready = 0; - uint32_t num_inputs = 0; - uint32_t total_num_inputs = input_queues_->size(); - - // discover which of the input queues are used for the compute - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - num_inputs = num_inputs + 1; - } - } - - // FIXME check to see of there are any routing jobs -- should be able to do this without waiting to fire - bool routed = doRouting( total_num_inputs ); - - //check to see if all of the input queues have data - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - num_ready = num_ready + 1; - } - } - } - - //if there are values waiting on any of the inputs, this PE could still fire - if( num_ready < num_inputs && num_ready > 0) { - pending_op_ = 1; - } else { - pending_op_ = 0 | routed; - } - - // make sure all of the output queues have room for new data - for( uint32_t i = 0; i < output_queues_->size(); ++i) { - // std::cout << " Queue " << i << " Size " << output_queues_->at(i)->data_queue_->size() << " Max " << queue_depth_ << std::endl; - if( output_queues_->at(i)->data_queue_->size() >= queue_depth_ && *output_queues_->at(i)->routing_arg_ == "" ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 " -- No room in output queue %" PRIu32 ", cannot fire\n", num_inputs, num_ready, i); - return false; - } - } - - //if all inputs are available pull from queue and add to arg list - if( num_inputs == 0 || num_ready < num_inputs ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - return false; - } else if( cycles_to_fire_ > 0 ) { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - cycles_to_fire_ = cycles_to_fire_ - 1; - pending_op_ = 1; - return false; - } else { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - argList.push_back(input_queues_->at(i)->data_queue_->front()); - input_queues_->at(i)->forwarded_ = 0; - input_queues_->at(i)->data_queue_->pop(); - } - } - cycles_to_fire_ = latency_; - } - - // If data tokens in output queue then simulation cannot end - pending_op_ = 1; - - switch( op_binding_ ) { - case ADD : - intResult = argList[0].to_ullong() + argList[1].to_ullong(); - break; - case SUB : - intResult = argList[0].to_ullong() - argList[1].to_ullong(); - break; - case MUL : - intResult = argList[0].to_ullong() * argList[1].to_ullong(); - break; - case DIV : - intResult = argList[0].to_ullong() / argList[1].to_ullong(); - break; - case REM : - intResult = argList[0].to_ullong() % argList[1].to_ullong(); - break; - default : - output_->verbose(CALL_INFO, 0, 0, "Error: could not find corresponding op-%" PRIu32 ".\n", op_binding_); - exit(-1); - } - - retVal = LlyrData(intResult); - - output_->verbose(CALL_INFO, 32, 0, "intResult = %" PRIu64 "\n", intResult); - output_->verbose(CALL_INFO, 32, 0, "retVal = %s\n", retVal.to_string().c_str()); - - //for now push the result to all output queues that need this result -- assume if no route, then receives data - for( uint32_t i = 0; i < output_queues_->size(); ++i ) { - if( *output_queues_->at(i)->routing_arg_ == "" ) { - output_queues_->at(i)->data_queue_->push(retVal); - } - } - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (1)\n"); - printInputQueue(); - printOutputQueue(); - } - - output_->verbose(CALL_INFO, 4, 0, ">> Compute 0x%" PRIx32 " - COMPLETE\n", op_binding_); - return true; - } - - //TODO for testing only - virtual void inputQueueInit() {}; - virtual void outputQueueInit() {}; - -};// IntProcessingElement - -class IntConstProcessingElement : public IntProcessingElement -{ -public: - IntConstProcessingElement(opType op_binding, uint32_t processor_id, LlyrConfig* llyr_config, - QueueArgMap* arguments) : - IntProcessingElement(op_binding, processor_id, llyr_config) - { - if( op_binding == DIVCONST ) { - latency_ = llyr_config->int_div_latency_; - } else { - latency_ = llyr_config->int_latency_; - } - cycles_to_fire_ = latency_; - - // iterate through the arguments and set initial queue values - for( auto it = arguments->begin(); it != arguments->end(); ++it ) { - auto retVal = input_queues_init_.emplace( it->first, it->second ); - if( retVal.second == false ) { - ///TODO - } - } - } - - virtual bool doCompute() - { - output_->verbose(CALL_INFO, 4, 0, ">> Compute %s\n", getOpString(op_binding_).c_str()); - - uint64_t intResult = 0x0F; - - std::vector< LlyrData > argList; - LlyrData retVal; - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (0)\n"); - printInputQueue(); - printOutputQueue(); - } - - uint32_t num_ready = 0; - uint32_t num_inputs = 0; - uint32_t total_num_inputs = input_queues_->size(); - - // discover which of the input queues are used for the compute - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - num_inputs = num_inputs + 1; - } - } - - // FIXME check to see of there are any routing jobs -- should be able to do this without waiting to fire - bool routed = doRouting( total_num_inputs ); - - //check to see if all of the input queues have data -- this no longer assumes contiguous input args - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - num_ready = num_ready + 1; - } - } - } - - pending_op_ = 0 | routed; - //if there are values waiting on any of the inputs (queue-0 is a const), this PE could still fire - for( uint32_t i = 1; i < total_num_inputs; ++i ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - pending_op_ = 1; - } else { - pending_op_ = 0 | routed; - } - } - - std::cout << "++++++ Input Queue Size: " << input_queues_->at(0)->data_queue_->size(); - std::cout << ", Num Inputs: " << num_inputs; - std::cout << ", Num Ready: " << num_ready << std::endl; - - // make sure all of the output queues have room for new data - for( uint32_t i = 0; i < output_queues_->size(); ++i) { - // std::cout << " Queue " << i << " Size " << output_queues_->at(i)->data_queue_->size() << " Max " << queue_depth_ << std::endl; - if( output_queues_->at(i)->data_queue_->size() >= queue_depth_ && *output_queues_->at(i)->routing_arg_ == "" ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 " -- No room in output queue %" PRIu32 ", cannot fire\n", num_inputs, num_ready, i); - return false; - } - } - - //if all inputs are available pull from queue and add to arg list - if( num_inputs == 0 || num_ready < num_inputs ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - return false; - } else if( cycles_to_fire_ > 0 ) { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - cycles_to_fire_ = cycles_to_fire_ - 1; - pending_op_ = 1; - return false; - } else { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - for( uint32_t i = 0; i < total_num_inputs; ++i ) { - if( input_queues_->at(i)->argument_ > -1 ) { - argList.push_back(input_queues_->at(i)->data_queue_->front()); - input_queues_->at(i)->forwarded_ = 0; - input_queues_->at(i)->data_queue_->pop(); - } - } - cycles_to_fire_ = latency_; - } - - // If data tokens in output queue then simulation cannot end - pending_op_ = 1; - - // first queue should be const, so save for later - input_queues_->at(0)->data_queue_->push(LlyrData(argList[0].to_ullong())); - - switch( op_binding_ ) { - case ADDCONST : - intResult = argList[0].to_ullong() + argList[1].to_ullong(); - break; - case SUBCONST : - intResult = argList[0].to_ullong() - argList[1].to_ullong(); - break; - case MULCONST : - intResult = argList[0].to_ullong() * argList[1].to_ullong(); - break; - case DIVCONST : - intResult = argList[0].to_ullong() / argList[1].to_ullong(); - break; - case REMCONST : - intResult = argList[0].to_ullong() % argList[1].to_ullong(); - break; - default : - output_->verbose(CALL_INFO, 0, 0, "Error: could not find corresponding op-%" PRIu32 ".\n", op_binding_); - exit(-1); - } - - retVal = LlyrData(intResult); - - output_->verbose(CALL_INFO, 32, 0, "intResult = %" PRIu64 "\n", intResult); - output_->verbose(CALL_INFO, 32, 0, "retVal = %s\n", retVal.to_string().c_str()); - - for( uint32_t i = 0; i < output_queues_->size(); ++i ) { - if( *output_queues_->at(i)->routing_arg_ == "" ) { - output_queues_->at(i)->data_queue_->push(retVal); - } - } - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (1)\n"); - printInputQueue(); - printOutputQueue(); - } - - return true; - }// doCompute - - virtual void inputQueueInit() - { - output_->verbose(CALL_INFO, 4, 0, ">> Fake Init Input Queue(%" PRIu32 "), Op %" PRIu32 " \n", - processor_id_, op_binding_ ); - - while( input_queues_->size() < input_queues_init_.size() ) { - LlyrQueue* tempQueue = new LlyrQueue; - tempQueue->forwarded_ = 0; - tempQueue->argument_ = 0; //TODO all args are consts right now, should change to -1 - tempQueue->routing_arg_ = new std::string(""); - tempQueue->data_queue_ = new std::queue< LlyrData >; - input_queues_->push_back(tempQueue); - } - - //TODO Need a more elegant way to initialize these queues - uint32_t queue_id = 0; - if( input_queues_init_.size() > 0 ) { - for( auto it = input_queues_init_.begin(); it != input_queues_init_.end(); ++it ) { - if( it->first == queue_id ) { - int64_t init_value = std::stoll(it->second); - LlyrData temp = LlyrData(init_value); - input_queues_->at(queue_id)->data_queue_->push(temp); - } - queue_id = queue_id + 1; - } - } - } - -private: - std::map< uint32_t, Arg > input_queues_init_; - -};// IntConstProcessingElement - -class AdvIntProcessingElement : public IntProcessingElement -{ -public: - AdvIntProcessingElement(opType op_binding, uint32_t processor_id, LlyrConfig* llyr_config, - QueueArgMap* arguments) : - IntProcessingElement(op_binding, processor_id, llyr_config) - { - latency_ = llyr_config->int_latency_; - cycles_to_fire_ = latency_; - - // iterate through the arguments and set initial queue values - for( auto it = arguments->begin(); it != arguments->end(); ++it ) { - std::cout << "[AdvIntProcessingElement]"; - std::cout << "input_queues_init_ -- "; - std::cout << " queue: " << it->first; - std::cout << " arg: " << it->second; - std::cout << std::endl; - - auto retVal = input_queues_init_.emplace( it->first, it->second ); - if( retVal.second == false ) { - ///TODO - } - } - - triggered_ = 0; - initialized_ = 0; - } - - virtual bool doCompute() - { - output_->verbose(CALL_INFO, 4, 0, ">> Compute %s\n", getOpString(op_binding_).c_str()); - - uint64_t intResult = 0x0F; - - std::vector< LlyrData > argList; - LlyrData retVal; - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (0)\n"); - printInputQueue(); - printOutputQueue(); - } - - uint32_t num_ready = 0; - uint32_t num_inputs = 0; - uint32_t total_num_inputs = input_queues_->size(); - - // discover which of the input queues are used for the compute - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - num_inputs = num_inputs + 1; - } - } - - // buffer the initial values for restart - if( num_inputs > 2 && initialized_ == 0 ) { - initialized_ = 1; - init0_ = input_queues_->at(0)->data_queue_->front(); - init1_ = input_queues_->at(1)->data_queue_->front(); - } - - // and on the sync reset for inc - if( op_binding_ == INC_RST && total_num_inputs > 2 ) { - std::cout << "MMMOFODSOFSDOFDSDS" << std::endl; - input_queues_->at(2)->argument_ = -1; - } - - // FIXME check to see of there are any routing jobs -- should be able to do this without waiting to fire - bool routed = doRouting( total_num_inputs ); - - //check to see if all of the input queues have data - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - num_ready = num_ready + 1; - } - } - } - - // // if there is an extra non-routed input queue, this is a triggered PE - // if( num_inputs == 3 && input_queues_->at(2)->data_queue_->size() > 0 ) { - // triggered_ = 1; - // input_queues_->at(2)->data_queue_->pop(); - // - // // reset if necessary - // if( initialized_ == 1 ) { - // initialized_ = 2; - // } else { - // input_queues_->at(0)->data_queue_->push(init0_); - // input_queues_->at(1)->data_queue_->push(init1_); - // } - // } - // std::cout << std::flush; - // - // // tricksy to force event - // if( triggered_ == 1 ) { - // num_inputs = num_inputs - 1; - // } - - //if there are values waiting on any of the inputs, this PE could still fire - if( num_ready < num_inputs && num_ready > 0) { - pending_op_ = 1; - } else { - pending_op_ = 0 | routed; - } - - // make sure all of the output queues have room for new data - for( uint32_t i = 0; i < output_queues_->size(); ++i) { - // std::cout << " Queue " << i << " Size " << output_queues_->at(i)->data_queue_->size() << " Max " << queue_depth_ << std::endl; - if( output_queues_->at(i)->data_queue_->size() >= queue_depth_ && *output_queues_->at(i)->routing_arg_ == "" ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 " -- No room in output queue %" PRIu32 ", cannot fire\n", num_inputs, num_ready, i); - return false; - } - } - - // there is no way to purge the queue so assume that compute is done - std::cout << "++++++ Input Queue Size: " << input_queues_->at(0)->data_queue_->size(); - std::cout << ", Num Inputs: " << num_inputs; - std::cout << ", Num Ready: " << num_ready; - std::cout << ", Triggered: " << triggered_; - std::cout << ", Init: " << initialized_ << std::endl; - - //if all inputs are available pull from queue and add to arg list - if( num_inputs == 0 || num_ready < num_inputs ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - return false; - } else if( cycles_to_fire_ > 0 ) { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - cycles_to_fire_ = cycles_to_fire_ - 1; - pending_op_ = 1; - return false; - } else { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - argList.push_back(input_queues_->at(i)->data_queue_->front()); - input_queues_->at(i)->forwarded_ = 0; - input_queues_->at(i)->data_queue_->pop(); - } - } - cycles_to_fire_ = latency_; - } - - // If data tokens in output queue then simulation cannot end - pending_op_ = 1; - - if( op_binding_ == INC ) { - if( argList[0].to_ullong() <= argList[1].to_ullong() ) { - intResult = argList[0].to_ullong(); - input_queues_->at(0)->data_queue_->push(LlyrData(intResult + 1)); - input_queues_->at(1)->data_queue_->push(LlyrData(argList[1].to_ullong())); - - retVal = LlyrData(intResult); - - output_->verbose(CALL_INFO, 32, 0, "intResult = %" PRIu64 "\n", intResult); - output_->verbose(CALL_INFO, 32, 0, "retVal = %s\n", retVal.to_string().c_str()); - - // for now push the result to all output queues that need this result - for( uint32_t i = 0; i < output_queues_->size(); ++i ) { - if( *output_queues_->at(i)->routing_arg_ == "" ) { - output_queues_->at(i)->data_queue_->push(retVal); - } - } - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (1)\n"); - printInputQueue(); - printOutputQueue(); - } - } else if( triggered_ == 1 ){ - triggered_ = 0; - - retVal = LlyrData(intResult); - - output_->verbose(CALL_INFO, 32, 0, "intResult = %" PRIu64 "\n", intResult); - output_->verbose(CALL_INFO, 32, 0, "retVal = %s\n", retVal.to_string().c_str()); - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (1)\n"); - printInputQueue(); - printOutputQueue(); - } - } - } else if( op_binding_ == INC_RST ) { - if( argList[0].to_ullong() <= argList[1].to_ullong() ) { - intResult = argList[0].to_ullong(); - input_queues_->at(0)->data_queue_->push(LlyrData(intResult + 1)); - input_queues_->at(1)->data_queue_->push(LlyrData(argList[1].to_ullong())); - - retVal = LlyrData(intResult); - - output_->verbose(CALL_INFO, 32, 0, "intResult = %" PRIu64 "\n", intResult); - output_->verbose(CALL_INFO, 32, 0, "retVal = %s\n", retVal.to_string().c_str()); - - // for now push the result to all output queues that need this result - for( uint32_t i = 0; i < output_queues_->size(); ++i ) { - if( *output_queues_->at(i)->routing_arg_ == "" ) { - output_queues_->at(i)->data_queue_->push(retVal); - } - } - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (1)\n"); - printInputQueue(); - printOutputQueue(); - } - } - - std::cout << "total_num_inputs=" << total_num_inputs; - if( total_num_inputs > 2 ) - std::cout << " queue_size=" << input_queues_->at(2)->data_queue_->size(); - std::cout << std::endl; - if( total_num_inputs == 3 && input_queues_->at(2)->data_queue_->size() > 0 ) { - std::cout << "RESET ME PLEASE!!" << std::endl; - if( argList[0].to_ullong() > argList[1].to_ullong() ) { - std::cout << "RESET NOW!!!!!" << std::endl; - input_queues_->at(0)->data_queue_->push(LlyrData(init0_)); - input_queues_->at(1)->data_queue_->push(LlyrData(init1_)); - input_queues_->at(2)->data_queue_->pop(); - } - } - - } else if( op_binding_ == ACC ) { - // need to save the next accumulator value - LlyrData temp = input_queues_->at(0)->data_queue_->front(); - input_queues_->at(0)->data_queue_->pop(); -std::cout << "XXX " << temp.to_ullong() << " + " << argList[0].to_ullong() <at(0)->data_queue_->push(LlyrData(intResult)); - - retVal = LlyrData(intResult); - - output_->verbose(CALL_INFO, 32, 0, "intResult = %" PRIu64 "\n", intResult); - output_->verbose(CALL_INFO, 32, 0, "retVal = %s\n", retVal.to_string().c_str()); - - // for now push the result to all output queues that need this result - for( uint32_t i = 0; i < output_queues_->size(); ++i ) { - if( *output_queues_->at(i)->routing_arg_ == "" ) { - output_queues_->at(i)->data_queue_->push(retVal); - } - } - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (1)\n"); - printInputQueue(); - printOutputQueue(); - } - } else { - output_->verbose(CALL_INFO, 0, 0, "Error: could not find corresponding op-%" PRIu32 ".\n", op_binding_); - exit(-1); - } - - return true; - } - - virtual void inputQueueInit() - { - output_->verbose(CALL_INFO, 4, 0, ">> Fake Init Input Queue(%" PRIu32 "), Op %" PRIu32 " \n", - processor_id_, op_binding_ ); - - while( input_queues_->size() < input_queues_init_.size() ) { - LlyrQueue* tempQueue = new LlyrQueue; - tempQueue->forwarded_ = 0; - tempQueue->argument_ = 0; - tempQueue->routing_arg_ = new std::string(""); - tempQueue->data_queue_ = new std::queue< LlyrData >; - input_queues_->push_back(tempQueue); - } - - //TODO Need a more elegant way to initialize these queues - uint32_t queue_id = 0; - if( input_queues_init_.size() > 0 ) { - for( auto it = input_queues_init_.begin(); it != input_queues_init_.end(); ++it ) { - if( it->first == queue_id ) { - int64_t init_value = std::stoll(it->second); - LlyrData temp = LlyrData(init_value); - input_queues_->at(queue_id)->data_queue_->push(temp); - } - queue_id = queue_id + 1; - } - } - - // this is hacky but need to ignore queue-0 on the accumulator - if( op_binding_ == ACC ) { - input_queues_->at(0)->argument_ = -1; - } - } - -private: - uint16_t initialized_; - uint16_t triggered_; // 0-inactive, 1-active, 2-resetting - - LlyrData init0_; - LlyrData init1_; - - std::map< uint32_t, Arg > input_queues_init_; - -};// AdvIntProcessingElement - -}//SST -}//Llyr - -#endif // _INT_PE_H_H diff --git a/src/sst/elements/llyr/pes/loadPE.h b/src/sst/elements/llyr/pes/loadPE.h deleted file mode 100644 index d205be1fc8..0000000000 --- a/src/sst/elements/llyr/pes/loadPE.h +++ /dev/null @@ -1,366 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _LOAD_PE_H -#define _LOAD_PE_H - -#include "pes/processingElement.h" - -namespace SST { -namespace Llyr { - -/** - * @todo write docs - */ -class LoadProcessingElement : public ProcessingElement -{ -public: - LoadProcessingElement(opType op_binding, uint32_t processor_id, LlyrConfig* llyr_config) : - ProcessingElement(op_binding, processor_id, llyr_config) - { - } - - virtual bool doReceive(LlyrData data) - { - output_->verbose(CALL_INFO, 8, 0, ">> Receive 0x%" PRIx64 "\n", uint64_t(data.to_ullong())); - - //for now push the result to all output queues that need this result - for( uint32_t i = 0; i < output_queues_->size(); ++i ) { - if( *output_queues_->at(i)->routing_arg_ == "" ) { - output_queues_->at(i)->data_queue_->push(data); - } - } - - return true; - } - - virtual bool doCompute() - { - // TraceFunction trace(CALL_INFO_LONG); - output_->verbose(CALL_INFO, 4, 0, ">> Compute %s\n", getOpString(op_binding_).c_str()); - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (0)\n"); - printInputQueue(); - printOutputQueue(); - } - - std::vector< LlyrData > argList; - uint32_t num_ready = 0; - uint32_t num_inputs = 0; - uint32_t total_num_inputs = input_queues_->size(); - - // discover which of the input queues are used for the compute - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - num_inputs = num_inputs + 1; - } - } - - // FIXME check to see of there are any routing jobs -- should be able to do this without waiting to fire - bool routed = doRouting( total_num_inputs ); - - //check to see if all of the input queues have data - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - num_ready = num_ready + 1; - } - } - } - - //if there are values waiting on any of the inputs, this PE could still fire - if( num_ready < num_inputs && num_ready > 0) { - pending_op_ = 1; - } else { - pending_op_ = 0 | routed; - } - - // make sure all of the output queues have room for new data - for( uint32_t i = 0; i < output_queues_->size(); ++i) { - // std::cout << " Queue " << i << " Size " << output_queues_->at(i)->data_queue_->size() << " Max " << queue_depth_ << std::endl; - if( output_queues_->at(i)->data_queue_->size() >= queue_depth_ && *output_queues_->at(i)->routing_arg_ == "" ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 " -- No room in output queue %" PRIu32 ", cannot fire\n", num_inputs, num_ready, i); - return false; - } - } - - //if all inputs are available pull from queue and add to arg list - if( num_inputs == 0 || num_ready < num_inputs ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 "\n", num_inputs, num_ready); - return false; - } else { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 "\n", num_inputs, num_ready); - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - argList.push_back(input_queues_->at(i)->data_queue_->front()); - input_queues_->at(i)->forwarded_ = 0; - input_queues_->at(i)->data_queue_->pop(); - } - } - } - - //create the memory request - switch( op_binding_ ) { - case LD : - doLoad(argList[0].to_ullong()); - break; - case ALLOCA : - break; - default : - output_->fatal(CALL_INFO, -1, "Error: could not find corresponding op-%" PRIu32 ".\n", op_binding_); - exit(-1); - } - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (1)\n"); - printInputQueue(); - printOutputQueue(); - } - - return true; - } - - virtual void inputQueueInit() - { - output_->verbose(CALL_INFO, 4, 0, ">> Fake Init Input Queue(%" PRIu32 "), Op %" PRIu32 " \n", - processor_id_, op_binding_ ); - while( input_queues_->size() < input_queues_init_.size() ) { -// std::cout << "Num queues (a): " << input_queues_->size() << std::endl; - LlyrQueue* tempQueue = new LlyrQueue; - tempQueue->forwarded_ = 0; - tempQueue->argument_ = 0; - tempQueue->routing_arg_ = new std::string(""); - tempQueue->data_queue_ = new std::queue< LlyrData >; - input_queues_->push_back(tempQueue); -// std::cout << "Num queues (b): " << input_queues_->size() << std::endl; - } - - //TODO Need a more elegant way to initialize these queues - uint32_t queue_id = 0; - if( input_queues_init_.size() > 0 ) { - for( auto it = input_queues_init_.begin(); it != input_queues_init_.end(); ++it ) { - if( it->first == queue_id ) { - int64_t init_value = std::stoll(it->second); - LlyrData temp = LlyrData(init_value); - input_queues_->at(queue_id)->data_queue_->push(temp); - } - queue_id = queue_id + 1; - } - } else { - //for now assume that the address queue is on in-0 - uint64_t addr = llyr_config_->starting_addr_ + ( (processor_id_ - 1) * (Bit_Length / 8) ); - if( input_queues_->size() > 0 ) { - LlyrData temp = LlyrData(addr); - output_->verbose(CALL_INFO, 8, 0, "Init(%" PRIu32 ")::%" PRIx64 "::%" PRIu64 "\n", 0, addr, temp.to_ulong()); - input_queues_->at(0)->data_queue_->push(temp); - - addr = addr + (Bit_Length / 8); - } - } - } - - virtual void outputQueueInit() - { - output_->verbose(CALL_INFO, 4, 0, ">> Fake Init Output Queue(%" PRIu32 "), Op %" PRIu32 " \n", - processor_id_, op_binding_ ); - - //TODO Need a more elegant way to initialize these queues - uint32_t queue_id = 0; - if( output_queues_init_.size() > 0 ) { - for( auto it = output_queues_init_.begin(); it != output_queues_init_.end(); ++it ) { - if( it->first == queue_id ) { - int64_t init_value = std::stoll(it->second); - LlyrData temp = LlyrData(init_value); - output_queues_->at(queue_id)->data_queue_->push(temp); - } - queue_id = queue_id + 1; - } - } else { - //FIXME going to initialize all of the output queues - uint64_t addr = ( llyr_config_->starting_addr_ + ( (processor_id_ - 1) * (Bit_Length / 8) ) ) % 2; - - for( uint32_t i = 0; i < output_queues_->size(); ++i ) { - LlyrData temp = LlyrData(addr); - output_->verbose(CALL_INFO, 8, 0, "Init(%" PRIu32 ")::%" PRIx64 "::%" PRIu64 "\n", i, addr, temp.to_ulong()); - output_queues_->at(i)->data_queue_->push(temp); - } - } - }; - -protected: - QueueArgMap input_queues_init_; - QueueArgMap output_queues_init_; - - bool doLoad(uint64_t addr) - { - uint32_t targetPe = 0; - StandardMem::Request* req = new StandardMem::Read(addr, 8); - - output_->verbose(CALL_INFO, 4, 0, "Creating a load request (%" PRIu32 ") from address: %" PRIu64 "\n", uint32_t(req->getID()), addr); - - //find out where the load actually needs to go - auto it = output_queue_map_.begin(); - for( ; it != output_queue_map_.end(); ++it ) { - if( it->first == 0 ) { - targetPe = it->second->getProcessorId(); - break; - } - } - - //exit the simulation if there is not a corresponding destination - if( it == output_queue_map_.end() ) { - output_->fatal(CALL_INFO, -1, "Error: could not find corresponding PE.\n"); - exit(-1); - } - - LSEntry* tempEntry = new LSEntry( req->getID(), processor_id_, targetPe ); - lsqueue_->addEntry( tempEntry ); - - mem_interface_->send( req ); - - return 1; - } - -private: - - -};//END LoadProcessingElement - -class AdvLoadProcessingElement : public LoadProcessingElement -{ -public: - AdvLoadProcessingElement(opType op_binding, uint32_t processor_id, LlyrConfig* llyr_config, - QueueArgMap* arguments) : - LoadProcessingElement(op_binding, processor_id, llyr_config) - { - // iterate through the arguments and set initial queue values - for( auto it = arguments->begin(); it != arguments->end(); ++it ) { - std::cout << "[AdvLoadProcessingElement]"; - std::cout << "input_queues_init_ -- "; - std::cout << " queue: " << it->first; - std::cout << " arg: " << it->second; - std::cout << std::endl; - - auto retVal = input_queues_init_.emplace( it->first, it->second ); - if( retVal.second == false ) { - ///TODO - } - } - } - - virtual bool doCompute() - { - // TraceFunction trace(CALL_INFO_LONG); - output_->verbose(CALL_INFO, 4, 0, ">> Compute %s\n", getOpString(op_binding_).c_str()); - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (0)\n"); - printInputQueue(); - printOutputQueue(); - } - - std::vector< LlyrData > argList; - uint32_t num_ready = 0; - uint32_t num_inputs = 0; - uint32_t total_num_inputs = input_queues_->size(); - - // discover which of the input queues are used for the compute - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - num_inputs = num_inputs + 1; - } - } - - // FIXME check to see of there are any routing jobs -- should be able to do this without waiting to fire - bool routed = doRouting( total_num_inputs ); - - //check to see if all of the input queues have data - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - num_ready = num_ready + 1; - } - } - } - - pending_op_ = 0 | routed; - //if there are values waiting on any of the inputs (queue-0/-1 are not valid for stream_ld), this PE could still fire - for( uint32_t i = 2; i < total_num_inputs; ++i ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - pending_op_ = 1; - } else { - pending_op_ = 0 | routed; - } - } - - // make sure all of the output queues have room for new data - for( uint32_t i = 0; i < output_queues_->size(); ++i) { - // std::cout << " Queue " << i << " Size " << output_queues_->at(i)->data_queue_->size() << " Max " << queue_depth_ << std::endl; - if( output_queues_->at(i)->data_queue_->size() >= queue_depth_ && *output_queues_->at(i)->routing_arg_ == "" ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 " -- No room in output queue %" PRIu32 ", cannot fire\n", num_inputs, num_ready, i); - return false; - } - } - - //if all inputs are available pull from queue and add to arg list - if( num_inputs == 0 || num_ready < num_inputs ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 "\n", num_inputs, num_ready); - return false; - } else { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 "\n", num_inputs, num_ready); - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - argList.push_back(input_queues_->at(i)->data_queue_->front()); - input_queues_->at(i)->forwarded_ = 0; - input_queues_->at(i)->data_queue_->pop(); - } - } - } - - pending_op_ = 1; - - //create the memory request - if( op_binding_ == LDADDR ) { - doLoad(argList[0].to_ullong()); - } else if( op_binding_ == STREAM_LD ) { - if( argList[1].to_ullong() > 0 ) { - input_queues_->at(0)->data_queue_->push(LlyrData(argList[0].to_ullong() + (Bit_Length / 8) )); - input_queues_->at(1)->data_queue_->push(LlyrData(argList[1].to_ullong() - 1)); - doLoad(argList[0].to_ullong()); - } - } else { - output_->fatal(CALL_INFO, -1, "Error: could not find corresponding op-%" PRIu32 ".\n", op_binding_); - exit(-1); - } - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (1)\n"); - printInputQueue(); - printOutputQueue(); - } - - return true; - } - -private: - - -};// AdvLoadProcessingElement - -}//SST -}//Llyr - -#endif // _LOAD_PE_H diff --git a/src/sst/elements/llyr/pes/logicPE.h b/src/sst/elements/llyr/pes/logicPE.h deleted file mode 100644 index e8c109942c..0000000000 --- a/src/sst/elements/llyr/pes/logicPE.h +++ /dev/null @@ -1,490 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _LOGIC_PE_H -#define _LOGIC_PE_H - -#include "pes/processingElement.h" - -namespace SST { -namespace Llyr { - -/** - * @todo write docs - */ -class LogicProcessingElement : public ProcessingElement -{ -public: - LogicProcessingElement(opType op_binding, uint32_t processor_id, LlyrConfig* llyr_config) : - ProcessingElement(op_binding, processor_id, llyr_config) - { - latency_ = llyr_config->arith_latency_; - cycles_to_fire_ = latency_; - } - - virtual bool doReceive(LlyrData data) { return 0; }; - - virtual bool doCompute() - { - output_->verbose(CALL_INFO, 4, 0, ">> Compute %s\n", getOpString(op_binding_).c_str()); - - uint64_t intResult = 0x0F; - - std::vector< LlyrData > argList; - LlyrData retVal; - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (0)\n"); - printInputQueue(); - printOutputQueue(); - } - - uint32_t num_ready = 0; - uint32_t num_inputs = 0; - uint32_t total_num_inputs = input_queues_->size(); - - // discover which of the input queues are used for the compute - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - num_inputs = num_inputs + 1; - } - } - - // FIXME check to see of there are any routing jobs -- should be able to do this without waiting to fire - bool routed = doRouting( total_num_inputs ); - - //check to see if all of the input queues have data - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - num_ready = num_ready + 1; - } - } - } - - //if there are values waiting on any of the inputs, this PE could still fire - if( num_ready < num_inputs && num_ready > 0) { - pending_op_ = 1; - } else { - pending_op_ = 0 | routed; - } - - // make sure all of the output queues have room for new data - for( uint32_t i = 0; i < output_queues_->size(); ++i) { - // std::cout << " Queue " << i << " Size " << output_queues_->at(i)->data_queue_->size() << " Max " << queue_depth_ << std::endl; - if( output_queues_->at(i)->data_queue_->size() >= queue_depth_ && *output_queues_->at(i)->routing_arg_ == "" ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 " -- No room in output queue %" PRIu32 ", cannot fire\n", num_inputs, num_ready, i); - return false; - } - } - - //if all inputs are available pull from queue and add to arg list - if( num_inputs == 0 || num_ready < num_inputs ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - return false; - } else if( cycles_to_fire_ > 0 ) { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - cycles_to_fire_ = cycles_to_fire_ - 1; - pending_op_ = 1; - return false; - } else { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - for( uint32_t i = 0; i < total_num_inputs; ++i) { - std::cout << " HERE " << i << " total " << total_num_inputs << std::endl; - if( input_queues_->at(i)->argument_ > -1 ) { - argList.push_back(input_queues_->at(i)->data_queue_->front()); - std::cout << "Pushing (" << i << ") "; - std::cout << input_queues_->at(i)->data_queue_->front() << "\n"; - std::cout << "Pushed " << argList.front() << std::endl; - input_queues_->at(i)->forwarded_ = 0; - input_queues_->at(i)->data_queue_->pop(); - } - } - cycles_to_fire_ = latency_; - } - - // If data tokens in output queue then simulation cannot end - pending_op_ = 1; - - switch( op_binding_ ) { - case AND : - retVal = argList[0]; - retVal &= argList[1]; - break; - case OR : - retVal = argList[0]; - retVal |= argList[1]; - break; - case XOR : - retVal = argList[0]; - retVal ^= argList[1]; - break; - case NOT : - retVal = ~argList[0]; - break; - case SLL : - retVal = argList[0] << argList[1].to_ullong(); - break; - case SLR : - retVal = argList[0] >> argList[1].to_ullong(); - break; - case ROL : - retVal = (argList[0] << argList[1].to_ullong()) | (argList[0] >> (Bit_Length - argList[1].to_ullong())); - break; - case ROR : - retVal = (argList[0] >> argList[1].to_ullong()) | (argList[0] << (Bit_Length - argList[1].to_ullong())); - break; - case EQ : - case NE : - case UGT : - case UGE : - case SGT : - case SGE : - case ULT : - case ULE : - case SLT : - case SLE : - retVal = helperFunction(op_binding_, argList[0], argList[1]); - break; - default : - output_->verbose(CALL_INFO, 0, 0, "Error: could not find corresponding op-%" PRIu32 ".\n", op_binding_); - exit(-1); - } - - output_->verbose(CALL_INFO, 32, 0, "intResult = %" PRIu64 "\n", intResult); - output_->verbose(CALL_INFO, 32, 0, "retVal = %s\n", retVal.to_string().c_str()); - - //for now push the result to all output queues that need this result -- assume if no route, then receives data - for( uint32_t i = 0; i < output_queues_->size(); ++i ) { - if( *output_queues_->at(i)->routing_arg_ == "" ) { - output_queues_->at(i)->data_queue_->push(retVal); - } - } - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (1)\n"); - printInputQueue(); - printOutputQueue(); - } - - return true; - } - - //TODO for testing only - virtual void inputQueueInit() {}; - virtual void outputQueueInit() {}; - -protected: - LlyrData helperFunction( opType op, LlyrData arg0, LlyrData arg1 ) - { - //TODO might need this for data length conversion? -// std::bitset<8> x("10000010"); -// std::bitset<64> y; -// -// //if msb == 1, then it's negative and must sign extend -// y = y | std::bitset<64>(x.to_ullong()); -// if( x.test(x.size() - 1) == 1 ) { -// y |= 0xFFFFFFFFFFFFFF00; -// } -// -// std::cout << "x:" << x << "::" << x.size() << "\n"; -// std::cout << "y:" << y << "::" << y.size() << "\n"; -// -// int64_t boo = (int64_t)(y.to_ulong()); -// std::bitset<64> bitTestL = boo; - - std::cout << "LOGIC ARG[0]:" << arg0 << "::" << arg0.to_ullong() << std::endl; - std::cout << "LOGIC ARG[1]:" << arg1 << "::" << arg1.to_ullong() << std::endl; - - if( op == EQ || op == EQ_IMM ) { - if( arg0.to_ullong() == arg1.to_ullong() ) { - return 1; - } else { - return 0; - } - } - else if( op == NE ) { - if( arg0.to_ullong() != arg1.to_ullong() ) { - return 1; - } else { - return 0; - } - } - else if( op == UGT || op == UGT_IMM ) { - if( arg0.to_ullong() > arg1.to_ullong() ) { - return 1; - } else { - return 0; - } - } - else if( op == UGE || op == UGE_IMM ) { - if( arg0.to_ullong() >= arg1.to_ullong() ) { - return 1; - } else { - return 0; - } - } - else if( op == ULT ) { - if( arg0.to_ullong() < arg1.to_ullong() ) { - return 1; - } else { - return 0; - } - } - else if( op == ULE || op == ULE_IMM ) { - if( arg0.to_ullong() <= arg1.to_ullong() ) { - return 1; - } else { - return 0; - } - } - else if( op == SGT || op == SGT_IMM ) { - int64_t arg0s = (int64_t)(arg0.to_ullong()); - int64_t arg1s = (int64_t)(arg1.to_ullong()); - - if( arg0s > arg1s ) { - return 1; - } else { - return 0; - } - } - else if( op == SGE ) { - int64_t arg0s = (int64_t)(arg0.to_ullong()); - int64_t arg1s = (int64_t)(arg1.to_ullong()); - - if( arg0s >= arg1s ) { - return 1; - } else { - return 0; - } - } - else if( op == SLT || op == SLT_IMM ) { - int64_t arg0s = (int64_t)(arg0.to_ullong()); - int64_t arg1s = (int64_t)(arg1.to_ullong()); - - if( arg0s < arg1s ) { - return 1; - } else { - return 0; - } - } - else if( op == SLE ) { - int64_t arg0s = (int64_t)(arg0.to_ullong()); - int64_t arg1s = (int64_t)(arg1.to_ullong()); - - if( arg0s <= arg1s ) { - return 1; - } else { - return 0; - } - } - else { - output_->verbose(CALL_INFO, 0, 0, "Error: could not find corresponding op-%" PRIu32 ".\n", op_binding_); - exit(-1); - } - } - -}; //LogicProcessingElement - -class LogicConstProcessingElement : public LogicProcessingElement -{ -public: - LogicConstProcessingElement(opType op_binding, uint32_t processor_id, LlyrConfig* llyr_config, - QueueArgMap* arguments) : - LogicProcessingElement(op_binding, processor_id, llyr_config) - { - latency_ = llyr_config->arith_latency_; - cycles_to_fire_ = latency_; - - // iterate through the arguments and set initial queue values - for( auto it = arguments->begin(); it != arguments->end(); ++it ) { - auto retVal = input_queues_init_.emplace( it->first, it->second ); - if( retVal.second == false ) { - ///TODO - } - } - } - - virtual bool doCompute() - { - output_->verbose(CALL_INFO, 4, 0, ">> Compute %s\n", getOpString(op_binding_).c_str()); - - uint64_t intResult = 0x0F; - - std::vector< LlyrData > argList; - LlyrData retVal; - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (0)\n"); - printInputQueue(); - printOutputQueue(); - } - - uint32_t num_ready = 0; - uint32_t num_inputs = 0; - uint32_t total_num_inputs = input_queues_->size(); - - // discover which of the input queues are used for the compute - for( uint32_t i = 0; i < total_num_inputs; ++i ) { - if( input_queues_->at(i)->argument_ > -1 ) { - num_inputs = num_inputs + 1; - } - } - - // FIXME check to see of there are any routing jobs -- should be able to do this without waiting to fire - bool routed = doRouting( total_num_inputs ); - - //check to see if all of the input queues have data -- this no longer assumes contiguous input args - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - num_ready = num_ready + 1; - } - } - } - - - pending_op_ = 0 | routed; - //if there are values waiting on any of the inputs (queue-0 is a const), this PE could still fire - for( uint32_t i = 1; i < total_num_inputs; ++i ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - pending_op_ = 1; - } else { - pending_op_ = 0 | routed; - } - } - - // make sure all of the output queues have room for new data - for( uint32_t i = 0; i < output_queues_->size(); ++i) { - // std::cout << " Queue " << i << " Size " << output_queues_->at(i)->data_queue_->size() << " Max " << queue_depth_ << std::endl; - if( output_queues_->at(i)->data_queue_->size() >= queue_depth_ && *output_queues_->at(i)->routing_arg_ == "" ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 " -- No room in output queue %" PRIu32 ", cannot fire\n", num_inputs, num_ready, i); - return false; - } - } - - std::cout << "++++++ Input Queue Size: " << input_queues_->at(0)->data_queue_->size(); - std::cout << ", Num Inputs: " << num_inputs; - std::cout << ", Num Ready: " << num_ready << std::endl; - - //if all inputs are available pull from queue and add to arg list - if( num_inputs == 0 || num_ready < num_inputs ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - return false; - } else if( cycles_to_fire_ > 0 ) { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - cycles_to_fire_ = cycles_to_fire_ - 1; - pending_op_ = 1; - return false; - } else { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 " Fire %" PRIu16 "\n", num_inputs, num_ready, cycles_to_fire_); - // first queue should be const - for( uint32_t i = 0; i < total_num_inputs; ++i) { - std::cout << " HERE " << i << " total " << total_num_inputs << std::endl; - if( input_queues_->at(i)->argument_ > -1 ) { - argList.push_back(input_queues_->at(i)->data_queue_->front()); - std::cout << "Pushing (" << i << ") "; - std::cout << input_queues_->at(i)->data_queue_->front() << "\n"; - std::cout << "Pushed " << argList.front() << std::endl; - input_queues_->at(i)->forwarded_ = 0; - input_queues_->at(i)->data_queue_->pop(); - } - } - cycles_to_fire_ = latency_; - } - - // If data tokens in output queue then simulation cannot end - pending_op_ = 1; - - // first queue should be const, so save for later - input_queues_->at(0)->data_queue_->push(LlyrData(argList[0].to_ullong())); - - switch( op_binding_ ) { - case AND_IMM: - retVal = argList[0]; - retVal &= argList[1]; - break; - case OR_IMM : - retVal = argList[0]; - retVal |= argList[1]; - break; - case EQ_IMM : - case UGT_IMM : - case UGE_IMM : - case ULE_IMM : - case SGT_IMM : - case SLT_IMM : - retVal = helperFunction(op_binding_, argList[0], argList[1]); - break; - default : - output_->verbose(CALL_INFO, 0, 0, "Error: could not find corresponding op-%" PRIu32 ".\n", op_binding_); - exit(-1); - } - - output_->verbose(CALL_INFO, 32, 0, "intResult = %" PRIu64 "\n", intResult); - output_->verbose(CALL_INFO, 32, 0, "retVal = %s\n", retVal.to_string().c_str()); - - for( uint32_t i = 0; i < output_queues_->size(); ++i ) { - if( *output_queues_->at(i)->routing_arg_ == "" ) { - output_queues_->at(i)->data_queue_->push(retVal); - } - } - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (1)\n"); - printInputQueue(); - printOutputQueue(); - } - - return true; - }// doCompute - - virtual void inputQueueInit() - { - output_->verbose(CALL_INFO, 4, 0, ">> Fake Init Input Queue(%" PRIu32 "), Op %" PRIu32 " \n", - processor_id_, op_binding_ ); - - while( input_queues_->size() < input_queues_init_.size() ) { - LlyrQueue* tempQueue = new LlyrQueue; - tempQueue->forwarded_ = 0; - tempQueue->argument_ = 0; - tempQueue->routing_arg_ = new std::string(""); - tempQueue->data_queue_ = new std::queue< LlyrData >; - input_queues_->push_back(tempQueue); - } - - //TODO Need a more elegant way to initialize these queues - uint32_t queue_id = 0; - if( input_queues_init_.size() > 0 ) { - for( auto it = input_queues_init_.begin(); it != input_queues_init_.end(); ++it ) { - if( it->first == queue_id ) { - int64_t init_value = std::stoll(it->second); - LlyrData temp = LlyrData(init_value); - input_queues_->at(queue_id)->data_queue_->push(temp); - } - queue_id = queue_id + 1; - } - } - } - -private: - std::map< uint32_t, Arg > input_queues_init_; - -};// LogicConstProcessingElement - -}//SST -}//Llyr - -#endif // _LOGIC_PE_H diff --git a/src/sst/elements/llyr/pes/peList.h b/src/sst/elements/llyr/pes/peList.h deleted file mode 100644 index 5e30cfdc60..0000000000 --- a/src/sst/elements/llyr/pes/peList.h +++ /dev/null @@ -1,28 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _PE_LIST_H -#define _PE_LIST_H - -#include "complexPE.h" -#include "controlPE.h" -#include "dummyPE.h" -#include "fpPE.h" -#include "intPE.h" -#include "loadPE.h" -#include "logicPE.h" -#include "storePE.h" - -#endif //_PE_LIST_H diff --git a/src/sst/elements/llyr/pes/processingElement.h b/src/sst/elements/llyr/pes/processingElement.h deleted file mode 100644 index eea89f8293..0000000000 --- a/src/sst/elements/llyr/pes/processingElement.h +++ /dev/null @@ -1,481 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - - -#ifndef _LLYR_PE_H -#define _LLYR_PE_H - -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../graph/graph.h" -#include "../lsQueue.h" -#include "../llyrTypes.h" -#include "../llyrHelpers.h" - -namespace SST { -namespace Llyr { - -// Contains input/output queue and metadata -typedef struct alignas(uint64_t) { - bool forwarded_; - int32_t argument_; - std::string* routing_arg_; - std::queue< LlyrData >* data_queue_; -} LlyrQueue; - -typedef struct alignas(uint64_t) { - bool valid_; - LlyrData data_; -} QueueData; - -class ProcessingElement -{ -public: - ProcessingElement(opType op_binding, uint32_t processor_id, LlyrConfig* llyr_config) : - op_binding_(op_binding), processor_id_(processor_id), - pending_op_(0), llyr_config_(llyr_config) - { - //setup up i/o for messages - char prefix[256]; - sprintf(prefix, "[t=@t][ProcessingElement-%u]: ", processor_id_); - output_ = new SST::Output(prefix, llyr_config_->verbosity_, 0, Output::STDOUT); - - pending_op_ = 0; - lsqueue_ = llyr_config->lsqueue_; - mem_interface_ = llyr_config->mem_interface_; - - queue_depth_ = llyr_config->queueDepth_; - input_queues_= new std::vector< LlyrQueue* >(); - output_queues_ = new std::vector< LlyrQueue* >(); - } - - virtual ~ProcessingElement() {}; - - uint32_t bindInputQueue(ProcessingElement* src) - { - uint32_t queueId = input_queues_->size(); - - output_->verbose(CALL_INFO, 4, 0, ">> Binding Input Queue-%" PRIu32 " on PE-%" PRIu32 " to PE-%" PRIu32 "\n", - queueId, processor_id_, src->getProcessorId() ); - - auto retVal = input_queue_map_.emplace( queueId, src ); - if( retVal.second == false ) { - return 0; - } - - LlyrQueue* tempQueue = new LlyrQueue; - tempQueue->forwarded_ = 0; - tempQueue->argument_ = 0; - tempQueue->routing_arg_ = new std::string(""); - tempQueue->data_queue_ = new std::queue< LlyrData >; - input_queues_->push_back(tempQueue); - - return queueId; - } - - uint32_t bindInputQueue(ProcessingElement* src, uint32_t queueId, int32_t argument) - { - output_->verbose(CALL_INFO, 4, 0, ">> Binding Input Queue-%" PRIu32 " on PE-%" PRIu32 " to PE-%" PRIu32 "\n", - queueId, processor_id_, src->getProcessorId() ); - - auto retVal = input_queue_map_.emplace( queueId, src ); - if( retVal.second == false ) { - return 0; - } - - while( input_queues_->size() <= queueId ) { - LlyrQueue* tempQueue = new LlyrQueue; - tempQueue->forwarded_ = 0; - tempQueue->routing_arg_ = new std::string(""); - tempQueue->argument_ = 0; - tempQueue->data_queue_ = new std::queue< LlyrData >; - input_queues_->push_back(tempQueue); - } - - input_queues_->at(queueId)->argument_ = argument; - - return queueId; - } - - uint32_t bindInputQueue(ProcessingElement* src, uint32_t queueId, int32_t argument, std::string* routing_arg) - { - output_->verbose(CALL_INFO, 4, 0, ">> Binding Input Queue-%" PRIu32 " on PE-%" PRIu32 " to PE-%" PRIu32 "\n", - queueId, processor_id_, src->getProcessorId() ); - - auto retVal = input_queue_map_.emplace( queueId, src ); - if( retVal.second == false ) { - return 0; - } - - while( input_queues_->size() <= queueId ) { - LlyrQueue* tempQueue = new LlyrQueue; - tempQueue->forwarded_ = 0; - tempQueue->routing_arg_ = new std::string(""); - tempQueue->argument_ = 0; - tempQueue->data_queue_ = new std::queue< LlyrData >; - input_queues_->push_back(tempQueue); - } - - input_queues_->at(queueId)->argument_ = argument; - input_queues_->at(queueId)->routing_arg_ = routing_arg; - - return queueId; - } - - uint32_t bindOutputQueue(ProcessingElement* dst) - { - uint32_t queueId = output_queues_->size(); - - output_->verbose(CALL_INFO, 4, 0, ">> Binding Output Queue-%" PRIu32 " on PE-%" PRIu32 " to PE-%" PRIu32 "\n", - queueId, processor_id_, dst->getProcessorId() ); - - auto retVal = output_queue_map_.emplace( queueId, dst ); - if( retVal.second == false ) { - return 0; - } - - LlyrQueue* tempQueue = new LlyrQueue; - tempQueue->forwarded_ = 0; - tempQueue->argument_ = 0; - tempQueue->routing_arg_ = new std::string(""); - tempQueue->data_queue_ = new std::queue< LlyrData >; - output_queues_->push_back(tempQueue); - - return queueId; - } - - uint32_t bindOutputQueue(ProcessingElement* dst, uint32_t queueId) - { - output_->verbose(CALL_INFO, 4, 0, ">> Binding Output Queue-%" PRIu32 " on PE-%" PRIu32 " to PE-%" PRIu32 "\n", - queueId, processor_id_, dst->getProcessorId() ); - - auto retVal = output_queue_map_.emplace( queueId, dst ); - if( retVal.second == false ) { - return 0; - } - - while( output_queues_->size() <= queueId ) { - LlyrQueue* tempQueue = new LlyrQueue; - tempQueue->forwarded_ = 0; - tempQueue->argument_ = 0; - tempQueue->routing_arg_ = new std::string(""); - tempQueue->data_queue_ = new std::queue< LlyrData >; - output_queues_->push_back(tempQueue); - } - - return queueId; - } - - void setOutputQueueRoute(uint32_t queueID, std::string* routing_arg) - { - if( output_queues_->size() < queueID ) { - output_->fatal(CALL_INFO, -1, "Error: Output Size %" PRIu64 " Smaller Than ID(%" PRIu32 "\n", - output_queues_->size(), queueID); - } - - output_queues_->at(queueID)->routing_arg_ = routing_arg; - } - - void createInputQueues(uint32_t numQueues) - { - while( input_queues_->size() < numQueues ) { - LlyrQueue* tempQueue = new LlyrQueue; - tempQueue->forwarded_ = 0; - tempQueue->routing_arg_ = new std::string(""); - tempQueue->argument_ = 0; - tempQueue->data_queue_ = new std::queue< LlyrData >; - input_queues_->push_back(tempQueue); - } - - std::cout << "Node " << processor_id_ << " -- " << input_queues_->size() << " queues" << std::endl; - } - - void pushInputQueue(uint32_t id, uint64_t &inVal ) - { - LlyrData newValue = LlyrData(inVal); - input_queues_->at(id)->data_queue_->push(newValue); - } - - void pushInputQueue(uint32_t id, LlyrData &inVal ) - { - input_queues_->at(id)->data_queue_->push(inVal); - } - - int32_t getInputQueueId(uint32_t id) const - { - auto it = input_queue_map_.begin(); - for( ; it != input_queue_map_.end(); ++it ) { - if( it->second->getProcessorId() == id ) { - return it->first; - } - } - - return -1; - } - - int32_t getOutputQueueId(uint32_t id) const - { - auto it = output_queue_map_.begin(); - for( ; it != output_queue_map_.end(); ++it ) { - if( it->second->getProcessorId() == id ) { - return it->first; - } - } - - return -1; - } - - int32_t getQueueOutputProcBinding(ProcessingElement* pe) const - { - auto it = output_queue_map_.begin(); - for( ; it != output_queue_map_.end(); ++it ) { - if( it->second == pe ) { - return it->first; - } - } - - return -1; - } - - ProcessingElement* getProcInputQueueBinding(uint32_t id) const - { - auto it = input_queue_map_.begin(); - for( ; it != input_queue_map_.end(); ++it ) { - if( it->first == id ) { - return it->second; - } - } - - return NULL; - } - - int32_t getQueueInputProcBinding(ProcessingElement* pe) const - { - auto it = input_queue_map_.begin(); - for( ; it != input_queue_map_.end(); ++it ) { - if( it->second == pe ) { - return it->first; - } - } - - return -1; - } - - uint32_t getNumInputQueues() const - { - if( input_queues_ != nullptr) { - return input_queues_->size(); - } else { - return 0; - } - } - - uint32_t getNumOutputQueues() const - { - if( output_queues_ != nullptr) { - return output_queues_->size(); - } else { - return 0; - } - } - - bool checkInputArgs(std::string* arg_in) - { - for( auto iter = input_queues_->begin(); iter != input_queues_->end(); ++iter ) { - if( *(*iter)->routing_arg_ == *arg_in ) { - std::cout << "TT " << *(*iter)->routing_arg_ << " -- " << * arg_in <at(id)->data_queue_->size(); } - - void setOpBinding(opType binding) { op_binding_ = binding; } - opType getOpBinding() const { return op_binding_; } - - void setProcessorId(uint32_t id) { processor_id_ = id; } - uint32_t getProcessorId() const { return processor_id_; } - - bool getPendingOp() const { return pending_op_; } - - void printInputQueue() - { - for( uint32_t i = 0; i < input_queues_->size(); ++i ) { - std::cout << "[PE-" << processor_id_ << "] "; - std::cout << "i:" << i << "(" << input_queues_->at(i)->argument_ << ")"; - std::cout << ": " << input_queues_->at(i)->data_queue_->size(); - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - std::cout << ":" << input_queues_->at(i)->data_queue_->front().to_ullong() << ":" << input_queues_->at(i)->data_queue_->front() << "\n"; - } else { - std::cout << ":x" << ":x" << "\n"; - } - } - } - - void printOutputQueue() - { - for( uint32_t i = 0; i < output_queues_->size(); ++i ) { - std::cout << "[PE-" << processor_id_ << "] "; - std::cout << "o:" << i << "(" << output_queues_->at(i)->argument_ << ")"; - std::cout << ": " << output_queues_->at(i)->data_queue_->size(); - if( output_queues_->at(i)->data_queue_->size() > 0 ) { - std::cout << ":" << output_queues_->at(i)->data_queue_->front().to_ullong() << ":" << output_queues_->at(i)->data_queue_->front() << "\n"; - } else { - std::cout << ":x" << ":x" << "\n"; - } - } - } - - virtual bool doSend() - { - uint32_t queueId; - LlyrData sendVal; - ProcessingElement* dstPe; - - for(auto it = output_queue_map_.begin() ; it != output_queue_map_.end(); ++it ) { - queueId = it->first; - dstPe = it->second; - - if( output_queues_->at(queueId)->data_queue_->size() > 0 ) { - std::cout << " Input Queue Depth at PE-" << dstPe->getProcessorId(); - std::cout << "(" << queueId << ") " << dstPe->getInputQueueSize(dstPe->getInputQueueId(processor_id_)); - std::cout << ", max is " << queue_depth_ << std::endl; - if( dstPe->getInputQueueSize(dstPe->getInputQueueId(processor_id_)) < queue_depth_ ) { - output_->verbose(CALL_INFO, 8, 0, ">> Sending (%llu)...%" PRIu32 "-%" PRIu32 " to %" PRIu32 "\n", - output_queues_->at(queueId)->data_queue_->front().to_ullong(), processor_id_, queueId, - dstPe->getProcessorId()); - - sendVal = output_queues_->at(queueId)->data_queue_->front(); - dstPe->pushInputQueue(dstPe->getInputQueueId(processor_id_), sendVal); - output_queues_->at(queueId)->data_queue_->pop(); - } else { - output_->verbose(CALL_INFO, 8, 0, ">> Sending failed...%" PRIu32 "-%" PRIu32 " to %" PRIu32 "\n", - processor_id_, queueId, dstPe->getProcessorId()); - } - - // with back pressure, sometimes the simulation will end even if tokens are live - pending_op_ = 1; - } - } - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (2)\n"); - printInputQueue(); - printOutputQueue(); - } - - return true; - } - - virtual bool doReceive(LlyrData data) = 0; - virtual bool doCompute() = 0; - - //TODO for testing only - virtual void inputQueueInit() = 0; - virtual void outputQueueInit() = 0; - -protected: - opType op_binding_; - uint32_t processor_id_; - - uint16_t timeout_; - uint16_t latency_; - uint16_t cycles_to_fire_; - - // input and output queues per PE - uint32_t queue_depth_; - std::vector< LlyrQueue* >* input_queues_; - std::vector< LlyrQueue* >* output_queues_; - - // need to connect PEs to queues -- queue_id, src/dst - std::map< uint32_t, ProcessingElement* > input_queue_map_; - std::map< uint32_t, ProcessingElement* > output_queue_map_; - - // track outstanding L/S requests (passed from top-level) - LSQueue* lsqueue_; - - Interfaces::StandardMem* mem_interface_; - SST::Output* output_; - - // used to stall execution - waiting on mem/queues full - bool pending_op_; - - // bundle of configuration parameters - LlyrConfig* llyr_config_; - - // Make sure that anything that needs to be routed gets routed - virtual bool doRouting( uint32_t total_num_inputs ) - { - bool global_route = 0; - for( uint32_t i = 0; i < total_num_inputs; ++i) { - bool routed = 0; - const std::string rtr_arg = *input_queues_->at(i)->routing_arg_; - std::cout << "\trtr_arg " << i << " -- fwd " << rtr_arg << " (" << input_queues_->at(i)->forwarded_ << ")" << std::endl; - if( rtr_arg == "" || input_queues_->at(i)->forwarded_ == 1 ) { - std::cout << "continue" << std::endl; - continue; - } -printOutputQueue(); - std::cout << "num output queues " << output_queues_->size() << std::endl; - for( uint32_t j = 0; j < output_queues_->size(); ++j) { - std::cout << "output queue arg (" << j << ") " << *output_queues_->at(j)->routing_arg_ << std::endl; - if( *output_queues_->at(j)->routing_arg_ == rtr_arg && input_queues_->at(i)->data_queue_->size() > 0) { - std::cout << "Now I'm hereherehere_3 -- " << input_queues_->at(i)->data_queue_->front() << std::endl; - - routed = 1; - output_queues_->at(j)->data_queue_->push(input_queues_->at(i)->data_queue_->front()); - std::cout << "data type arg " << input_queues_->at(i)->argument_ << std::endl; - output_->verbose(CALL_INFO, 4, 0, "+Routing %s from %" PRIu32 "\n", rtr_arg.c_str(), i); - } - } -printOutputQueue(); - if( routed == 1 ) { - if( input_queues_->at(i)->argument_ == -1 ) { - input_queues_->at(i)->data_queue_->pop(); - } else { - input_queues_->at(i)->forwarded_ = 1; - } - } - - global_route = global_route | routed; - } - - return global_route; - } - -private: - -}; - - -}//SST -}//Llyr - -#endif //_LLYR_PE_H - diff --git a/src/sst/elements/llyr/pes/storePE.h b/src/sst/elements/llyr/pes/storePE.h deleted file mode 100644 index c701e9d883..0000000000 --- a/src/sst/elements/llyr/pes/storePE.h +++ /dev/null @@ -1,352 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _STORE_PE_H -#define _STORE_PE_H - -#include - -#include "pes/processingElement.h" - -namespace SST { -namespace Llyr { - -/** - * @todo write docs - */ -class StoreProcessingElement : public ProcessingElement -{ -public: - StoreProcessingElement(opType op_binding, uint32_t processor_id, LlyrConfig* llyr_config) : - ProcessingElement(op_binding, processor_id, llyr_config) - { - } - - virtual bool doReceive(LlyrData data) - { - output_->verbose(CALL_INFO, 8, 0, ">> Receive 0x%" PRIx64 "\n", uint64_t(data.to_ullong())); - - //for now push the result to all output queues that need this result - for( uint32_t i = 0; i < output_queues_->size(); ++i ) { - if( *output_queues_->at(i)->routing_arg_ == "" ) { - output_queues_->at(i)->data_queue_->push(data); - } - } - - return true; - } - - virtual bool doCompute() - { -// output_->verbose(CALL_INFO, 4, 0, ">> Compute 0x%" PRIx32 "\n", op_binding_); - output_->verbose(CALL_INFO, 4, 0, ">> Compute %s\n", getOpString(op_binding_).c_str()); - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (0)\n"); - printInputQueue(); - printOutputQueue(); - } - - std::vector< LlyrData > argList; - uint32_t num_ready = 0; - uint32_t num_inputs = 0; - uint32_t total_num_inputs = input_queues_->size(); - - // discover which of the input queues are used for the compute - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - num_inputs = num_inputs + 1; - } - } - - // FIXME check to see of there are any routing jobs -- should be able to do this without waiting to fire - bool routed = doRouting( total_num_inputs ); - - //check to see if all of the input queues have data - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - num_ready = num_ready + 1; - } - } - } - - //if there are values waiting on any of the inputs, this PE could still fire - if( num_ready < num_inputs && num_ready > 0) { - pending_op_ = 1; - } else { - pending_op_ = 0 | routed; - } - - // make sure all of the output queues have room for new data - for( uint32_t i = 0; i < output_queues_->size(); ++i) { - // std::cout << " Queue " << i << " Size " << output_queues_->at(i)->data_queue_->size() << " Max " << queue_depth_ << std::endl; - if( output_queues_->at(i)->data_queue_->size() >= queue_depth_ && *output_queues_->at(i)->routing_arg_ == "" ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 " -- No room in output queue %" PRIu32 ", cannot fire\n", num_inputs, num_ready, i); - return false; - } - } - - //if all inputs are available pull from queue and add to arg list - if( num_inputs == 0 || num_ready < num_inputs ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 "\n", num_inputs, num_ready); - return false; - } else { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 "\n", num_inputs, num_ready); - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - argList.push_back(input_queues_->at(i)->data_queue_->front()); - input_queues_->at(i)->forwarded_ = 0; - input_queues_->at(i)->data_queue_->pop(); - } - } - } - - doStore(argList[0].to_ullong(), argList[1].to_ullong()); - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (1)\n"); - printInputQueue(); - printOutputQueue(); - } - - return true; - } - - virtual void inputQueueInit() - { - output_->verbose(CALL_INFO, 4, 0, ">> Fake Init Input Queue(%" PRIu32 "), Op %" PRIu32 " \n", - processor_id_, op_binding_ ); - - while( input_queues_->size() < input_queues_init_.size() ) { - LlyrQueue* tempQueue = new LlyrQueue; - tempQueue->forwarded_ = 0; - tempQueue->argument_ = 0; - tempQueue->routing_arg_ = new std::string(""); - tempQueue->data_queue_ = new std::queue< LlyrData >; - input_queues_->push_back(tempQueue); - } - - //TODO Need a more elegant way to initialize these queues - uint32_t queue_id = 0; - if( input_queues_init_.size() > 0 ) { - for( auto it = input_queues_init_.begin(); it != input_queues_init_.end(); ++it ) { - if( it->first == queue_id ) { - int64_t init_value = std::stoll(it->second); - LlyrData temp = LlyrData(init_value); - input_queues_->at(queue_id)->argument_ = 0; - input_queues_->at(queue_id)->data_queue_->push(temp); - } - queue_id = queue_id + 1; - } - } else { - //for now assume that the address queue is on in-0 - uint64_t addr = llyr_config_->starting_addr_ + ( (processor_id_ - 1) * (Bit_Length / 8) ); - if( input_queues_->size() > 0 ) { - LlyrData temp = LlyrData(addr); - output_->verbose(CALL_INFO, 8, 0, "Init(%" PRIu32 ")::%" PRIx64 "::%" PRIu64 "\n", 0, addr, temp.to_ulong()); - input_queues_->at(0)->data_queue_->push(temp); - - addr = addr + (Bit_Length / 8); - } - } - } - - virtual void outputQueueInit() {}; - -protected: - std::map< uint32_t, Arg > input_queues_init_; - - bool doStore(uint64_t addr, LlyrData data) - { - uint32_t targetPe = processor_id_; - - const auto newValue = data.to_ullong(); - - constexpr auto size = sizeof(uint64_t); - uint8_t buffer[size] = {}; - std::memcpy(buffer, std::addressof(newValue), size); - - output_->verbose(CALL_INFO, 64, 0, "llyr = %s\n", data.to_string().c_str()); - output_->verbose(CALL_INFO, 64, 0, "conv = %llu\n", newValue); - - if( output_->getVerboseLevel() >= 64 ) { - std::stringstream dataOut; - for(uint32_t i = 0; i < size; ++i) { - dataOut << static_cast< uint16_t >(buffer[i]) << ", "; - } - output_->verbose(CALL_INFO, 64, 0, "%s\n", dataOut.str().c_str()); - } - - std::vector< uint8_t > payload(8); - memcpy( std::addressof(payload[0]), std::addressof(newValue), size ); - - if( output_->getVerboseLevel() >= 64 ) { - std::stringstream dataOut; - for( auto it = payload.begin(); it != payload.end(); ++it ) { - dataOut << static_cast< uint16_t >(*it) << ", "; - } - output_->verbose(CALL_INFO, 64, 0, "%s\n", dataOut.str().c_str()); - } - - StandardMem::Request* req = new StandardMem::Write(addr, 8, payload); - output_->verbose(CALL_INFO, 4, 0, "Creating a store request (%" PRIu32 ") for %llu at address: %" PRIu64 "\n", uint32_t(req->getID()), newValue, addr); - - LSEntry* tempEntry = new LSEntry( req->getID(), processor_id_, targetPe ); - lsqueue_->addEntry( tempEntry ); - - mem_interface_->send( req ); - - return 1; - } - -private: - -};// END StoreProcessingElement - -class AdvStoreProcessingElement : public StoreProcessingElement -{ -public: - AdvStoreProcessingElement(opType op_binding, uint32_t processor_id, LlyrConfig* llyr_config, - QueueArgMap* arguments) : - StoreProcessingElement(op_binding, processor_id, llyr_config) - { - // iterate through the arguments and set initial queue values - for( auto it = arguments->begin(); it != arguments->end(); ++it ) { - - std::cout << "[AdvStoreProcessingElement]"; - std::cout << "input_queues_init_ -- "; - std::cout << " queue: " << it->first; - std::cout << " arg: " << it->second; - std::cout << std::endl; - - auto retVal = input_queues_init_.emplace( it->first, it->second ); - if( retVal.second == false ) { - ///TODO - } - } - } - - virtual bool doCompute() - { - output_->verbose(CALL_INFO, 4, 0, ">> Compute %s\n", getOpString(op_binding_).c_str()); - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (0)\n"); - printInputQueue(); - printOutputQueue(); - } - - std::vector< QueueData > argList(3); - uint32_t num_ready = 0; - uint32_t num_inputs = 0; - uint32_t total_num_inputs = input_queues_->size(); - - // discover which of the input queues are used for the compute - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - num_inputs = num_inputs + 1; - } - } - - // FIXME check to see of there are any routing jobs -- should be able to do this without waiting to fire - bool routed = doRouting( total_num_inputs ); - - //check to see if all of the input queues have data - for( uint32_t i = 0; i < total_num_inputs; ++i ) { - if( input_queues_->at(i)->argument_ > -1 ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - num_ready = num_ready + 1; - } - } - } - - pending_op_ = 0 | routed; - //if there are values waiting on any of the inputs (queue-0/-1 are not valid for stream_st), this PE could still fire - for( uint32_t i = 2; i < total_num_inputs; ++i ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - pending_op_ = 1; - } else { - pending_op_ = 0 | routed; - } - } - - // make sure all of the output queues have room for new data - for( uint32_t i = 0; i < output_queues_->size(); ++i) { - // std::cout << " Queue " << i << " Size " << output_queues_->at(i)->data_queue_->size() << " Max " << queue_depth_ << std::endl; - if( output_queues_->at(i)->data_queue_->size() >= queue_depth_ && *output_queues_->at(i)->routing_arg_ == "" ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 " -- No room in output queue %" PRIu32 ", cannot fire\n", num_inputs, num_ready, i); - return false; - } - } - - //if all inputs are available pull from queue and add to arg list - if( num_inputs == 0 || num_ready < num_inputs ) { - output_->verbose(CALL_INFO, 4, 0, "-Inputs %" PRIu32 " Ready %" PRIu32 "\n", num_inputs, num_ready); - return false; - } else { - output_->verbose(CALL_INFO, 4, 0, "+Inputs %" PRIu32 " Ready %" PRIu32 "\n", num_inputs, num_ready); - for( uint32_t i = 0; i < total_num_inputs; ++i) { - if( input_queues_->at(i)->argument_ > -1 ) { - if( input_queues_->at(i)->data_queue_->size() > 0 ) { - argList[i].valid_ = 1; - argList[i].data_ = input_queues_->at(i)->data_queue_->front(); - input_queues_->at(i)->forwarded_ = 0; - input_queues_->at(i)->data_queue_->pop(); - } else { - argList[i].valid_ = 0; - } - } - } - } - - pending_op_ = 1; - - // STADDR: Takes a constant and a variable; constant is addr, variable is data - // STREAM_ST: Takes two constants and a variable; const0 is starting addr, const1 is number of stores, var is data - //create the memory request - if( op_binding_ == STADDR ) { - input_queues_->at(0)->data_queue_->push(LlyrData(argList[0].data_.to_ullong())); - doStore(argList[0].data_.to_ullong(), argList[1].data_.to_ullong()); - } else if( op_binding_ == STREAM_ST ) { - if( argList[2].valid_ == 1 ) { - if( argList[1].data_.to_ullong() >= 0 ) { - input_queues_->at(0)->data_queue_->push(LlyrData(argList[0].data_.to_ullong() + (Bit_Length / 8) )); - input_queues_->at(1)->data_queue_->push(LlyrData(argList[1].data_.to_ullong() - 1)); - doStore(argList[0].data_.to_ullong(), argList[2].data_.to_ullong()); - } - } - } else { - output_->fatal(CALL_INFO, -1, "Error: could not find corresponding op-%" PRIu32 ".\n", op_binding_); - exit(-1); - } - - if( output_->getVerboseLevel() >= 10 ) { - output_->verbose(CALL_INFO, 10, 0, "Queue Contents (1)\n"); - printInputQueue(); - printOutputQueue(); - } - - return true; - } - -private: - -};// AdvStoreProcessingElement - -}//SST -}//Llyr - -#endif // _STORE_PE_H diff --git a/src/sst/elements/llyr/tests/gemm.in b/src/sst/elements/llyr/tests/gemm.in deleted file mode 100644 index 1126fc4d7e..0000000000 --- a/src/sst/elements/llyr/tests/gemm.in +++ /dev/null @@ -1,56 +0,0 @@ -1 [pe_type=LD] -2 [pe_type=LD] -3 [pe_type=LD] -4 [pe_type=LD] -5 [pe_type=LD] -6 [pe_type=LD] -7 [pe_type=LD] -8 [pe_type=LD] -9 [pe_type=LD] -10 [pe_type=MUL] -11 [pe_type=MUL] -12 [pe_type=MUL] -13 [pe_type=MUL] -14 [pe_type=MUL] -15 [pe_type=MUL] -16 [pe_type=ADD] -17 [pe_type=ADD] -18 [pe_type=ADD] -19 [pe_type=ADD] -20 [pe_type=ST] -21 [pe_type=ST] - -1 -- 10 -1 -- 13 - -4 -- 10 - -2 -- 11 -2 -- 14 - -6 -- 11 - -3 -- 12 -3 -- 15 - -8 -- 12 - -5 -- 13 - -7 -- 14 - -9 -- 15 - -10 -- 16 -11 -- 16 -16 -- 17 -12 -- 17 - -13 -- 18 -14 -- 18 -18 -- 19 -15 -- 19 - -17 -- 20 -19 -- 21 - diff --git a/src/sst/elements/llyr/tests/graph_mesh_25.hdw b/src/sst/elements/llyr/tests/graph_mesh_25.hdw deleted file mode 100644 index bea105e245..0000000000 --- a/src/sst/elements/llyr/tests/graph_mesh_25.hdw +++ /dev/null @@ -1,107 +0,0 @@ -digraph "Hardware Description" { -0 [label=any] -1 [label=any] -2 [label=any] -3 [label=any] -4 [label=any] -5 [label=any] -6 [label=any] -7 [label=any] -8 [label=any] -9 [label=any] -10 [label=any] -11 [label=any] -12 [label=any] -13 [label=any] -14 [label=any] -15 [label=any] -16 [label=any] -17 [label=any] -18 [label=any] -19 [label=any] -20 [label=any] -21 [label=any] -22 [label=any] -23 [label=any] -24 [label=any] -0--1 -0--5 -1--0 -1--2 -1--6 -2--1 -2--3 -2--7 -3--2 -3--4 -3--8 -4--3 -4--9 -5--0 -5--6 -5--10 -6--1 -6--5 -6--7 -6--11 -7--2 -7--6 -7--8 -7--12 -8--3 -8--7 -8--9 -8--13 -9--4 -9--8 -9--14 -10--5 -10--11 -10--15 -11--6 -11--10 -11--12 -11--16 -12--7 -12--11 -12--13 -12--17 -13--8 -13--12 -13--14 -13--18 -14--9 -14--13 -14--19 -15--10 -15--16 -15--20 -16--11 -16--15 -16--17 -16--21 -17--12 -17--16 -17--18 -17--22 -18--13 -18--17 -18--19 -18--23 -19--14 -19--18 -19--24 -20--15 -20--21 -21--16 -21--20 -21--22 -22--17 -22--21 -22--23 -23--18 -23--22 -23--24 -24--19 -24--23 -} diff --git a/src/sst/elements/llyr/tests/int-1.mem b/src/sst/elements/llyr/tests/int-1.mem deleted file mode 100644 index c07122cd0b..0000000000 --- a/src/sst/elements/llyr/tests/int-1.mem +++ /dev/null @@ -1 +0,0 @@ -0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,511,512,513,514,515,516,517,518,519,520,521,522,523,524,525,526,527,528,529,530,531,532,533,534,535,536,537,538,539,540,541,542,543,544,545,546,547,548,549,550,551,552,553,554,555,556,557,558,559,560,561,562,563,564,565,566,567,568,569,570,571,572,573,574,575,576,577,578,579,580,581,582,583,584,585,586,587,588,589,590,591,592,593,594,595,596,597,598,599,600,601,602,603,604,605,606,607,608,609,610,611,612,613,614,615,616,617,618,619,620,621,622,623,624,625,626,627,628,629,630,631,632,633,634,635,636,637,638,639,640,641,642,643,644,645,646,647,648,649,650,651,652,653,654,655,656,657,658,659,660,661,662,663,664,665,666,667,668,669,670,671,672,673,674,675,676,677,678,679,680,681,682,683,684,685,686,687,688,689,690,691,692,693,694,695,696,697,698,699,700,701,702,703,704,705,706,707,708,709,710,711,712,713,714,715,716,717,718,719,720,721,722,723,724,725,726,727,728,729,730,731,732,733,734,735,736,737,738,739,740,741,742,743,744,745,746,747,748,749,750,751,752,753,754,755,756,757,758,759,760,761,762,763,764,765,766,767,768,769,770,771,772,773,774,775,776,777,778,779,780,781,782,783,784,785,786,787,788,789,790,791,792,793,794,795,796,797,798,799,800,801,802,803,804,805,806,807,808,809,810,811,812,813,814,815,816,817,818,819,820,821,822,823,824,825,826,827,828,829,830,831,832,833,834,835,836,837,838,839,840,841,842,843,844,845,846,847,848,849,850,851,852,853,854,855,856,857,858,859,860,861,862,863,864,865,866,867,868,869,870,871,872,873,874,875,876,877,878,879,880,881,882,883,884,885,886,887,888,889,890,891,892,893,894,895,896,897,898,899,900,901,902,903,904,905,906,907,908,909,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,930,931,932,933,934,935,936,937,938,939,940,941,942,943,944,945,946,947,948,949,950,951,952,953,954,955,956,957,958,959,960,961,962,963,964,965,966,967,968,969,970,971,972,973,974,975,976,977,978,979,980,981,982,983,984,985,986,987,988,989,990,991,992,993,994,995,996,997,998,999,1000,1001,1002,1003,1004,1005,1006,1007,1008,1009,1010,1011,1012,1013,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023,1024 diff --git a/src/sst/elements/llyr/tests/llyr_test.py b/src/sst/elements/llyr/tests/llyr_test.py deleted file mode 100644 index d964153950..0000000000 --- a/src/sst/elements/llyr/tests/llyr_test.py +++ /dev/null @@ -1,74 +0,0 @@ -# Automatically generated SST Python input -import sst - -# Define SST core options -sst.setProgramOption("timebase", "1 ps") -sst.setProgramOption("stopAtCycle", "10000s") - -# Constants shared across components -tile_clk_mhz = 1 -backing_size = 16384 -l1_size = 512 -verboseLevel = 0 -statLevel = 16 -mainDebug = 0 -otherDebug = 0 -debugLevel = 0 - -# Define the simulation components -df_0 = sst.Component("df_0", "llyr.LlyrDataflow") -df_0.addParams({ - "verbose" : str(verboseLevel), - "clock" : str(tile_clk_mhz) + "GHz", - "mem_init" : "int-1.mem", - "application" : "gemm.in", - "hardware_graph": "graph_mesh_25.hdw", - "mapper" : "llyr.mapper.simple" -}) -iface = df_0.setSubComponent("iface", "memHierarchy.standardInterface") - -df_l1cache = sst.Component("df_l1", "memHierarchy.Cache") -df_l1cache.addParams({ - "access_latency_cycles" : "2", - "cache_frequency" : str(tile_clk_mhz) + "GHz", - "replacement_policy" : "lru", - "coherence_protocol" : "MESI", - "cache_size" : str(l1_size) + "B", - "associativity" : "1", - "cache_line_size" : "16", - "verbose" : str(verboseLevel), - "debug" : str(otherDebug), - "debug_level" : str(debugLevel), - "L1" : "1" -}) - -df_memory = sst.Component("memory", "memHierarchy.MemController") -df_memory.addParams({ - "backing" : "mmap", - "verbose" : str(verboseLevel), - "debug" : str(otherDebug), - "debug_level" : str(debugLevel), - "addr_range_start" : "0", - "clock" : str(tile_clk_mhz) + "GHz", -}) - -backend = df_memory.setSubComponent("backend", "memHierarchy.simpleMem") -backend.addParams({ - "access_time" : "100 ns", - "mem_size" : str(backing_size) + "B", -}) - -# Enable SST Statistics Outputs for this simulation -sst.setStatisticLoadLevel(statLevel) -sst.enableAllStatisticsForAllComponents({"type":"sst.AccumulatorStatistic"}) -#sst.setStatisticOutput("sst.statOutputTXT", { "filepath" : "output.csv" }) - -# Define the simulation links -link_df_cache_link = sst.Link("link_cpu_cache_link") -link_df_cache_link.connect( (iface, "port", "1ps"), (df_l1cache, "high_network_0", "1ps") ) -link_df_cache_link.setNoCut() - -link_mem_bus_link = sst.Link("link_mem_bus_link") -link_mem_bus_link.connect( (df_l1cache, "low_network_0", "5ps"), (df_memory, "direct_link", "5ps") ) - - diff --git a/src/sst/elements/llyr/tests/refFiles/llyr_test.out b/src/sst/elements/llyr/tests/refFiles/llyr_test.out deleted file mode 100644 index 2fc046fdea..0000000000 --- a/src/sst/elements/llyr/tests/refFiles/llyr_test.out +++ /dev/null @@ -1,175 +0,0 @@ -memory, Notice: memory controller's region is larger than the backend's mem_size, controller is limiting accessible memory to mem_size -Region: start=0, end=18446744073709551615, interleaveStep=0, interleaveSize=0. MemSize: 16384B - df_l1.default_stat : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_GetS_I : Accumulator : Sum.u64 = 5; SumSQ.u64 = 5; Count.u64 = 5; Min.u64 = 1; Max.u64 = 1; - df_l1.stateEvent_GetS_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_GetS_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_GetX_I : Accumulator : Sum.u64 = 2; SumSQ.u64 = 2; Count.u64 = 2; Min.u64 = 1; Max.u64 = 1; - df_l1.stateEvent_GetX_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_GetX_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_GetSX_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_GetSX_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_GetSX_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_GetSResp_IS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_GetXResp_IS : Accumulator : Sum.u64 = 5; SumSQ.u64 = 5; Count.u64 = 5; Min.u64 = 1; Max.u64 = 1; - df_l1.stateEvent_GetXResp_IM : Accumulator : Sum.u64 = 2; SumSQ.u64 = 2; Count.u64 = 2; Min.u64 = 1; Max.u64 = 1; - df_l1.stateEvent_GetXResp_SM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_Inv_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_Inv_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_Inv_IS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_Inv_IM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_Inv_SM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_Inv_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_Inv_IB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FetchInvX_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FetchInvX_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FetchInvX_IS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FetchInvX_IM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FetchInvX_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FetchInvX_IB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_Fetch_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_Fetch_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_Fetch_IS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_Fetch_IM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_Fetch_SM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_Fetch_IB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_Fetch_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FetchInv_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FetchInv_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FetchInv_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FetchInv_IS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FetchInv_IM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FetchInv_SM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FetchInv_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FetchInv_IB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_ForceInv_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_ForceInv_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_ForceInv_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_ForceInv_IS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_ForceInv_IM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_ForceInv_SM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_ForceInv_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_ForceInv_IB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FlushLine_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FlushLine_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FlushLine_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FlushLineInv_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FlushLineInv_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FlushLineInv_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FlushLineResp_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FlushLineResp_IB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FlushLineResp_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.eventSent_GetS : Accumulator : Sum.u64 = 5; SumSQ.u64 = 5; Count.u64 = 5; Min.u64 = 1; Max.u64 = 1; - df_l1.eventSent_GetX : Accumulator : Sum.u64 = 2; SumSQ.u64 = 2; Count.u64 = 2; Min.u64 = 1; Max.u64 = 1; - df_l1.eventSent_GetSX : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.eventSent_Write : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.eventSent_PutM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.eventSent_NACK : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.eventSent_FlushLine : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.eventSent_FlushLineInv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.eventSent_FetchResp : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.eventSent_FetchXResp : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.eventSent_AckInv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.eventSent_GetSResp : Accumulator : Sum.u64 = 9; SumSQ.u64 = 9; Count.u64 = 9; Min.u64 = 1; Max.u64 = 1; - df_l1.eventSent_GetXResp : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.eventSent_WriteResp : Accumulator : Sum.u64 = 2; SumSQ.u64 = 2; Count.u64 = 2; Min.u64 = 1; Max.u64 = 1; - df_l1.eventSent_FlushLineResp : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.eventSent_Put : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.eventSent_Get : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.eventSent_AckMove : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.eventSent_CustomReq : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.eventSent_CustomResp : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.eventSent_CustomAck : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.EventStalledForLockedCacheline : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.evict_I : Accumulator : Sum.u64 = 6; SumSQ.u64 = 6; Count.u64 = 6; Min.u64 = 1; Max.u64 = 1; - df_l1.evict_S : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.evict_M : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.evict_IS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.evict_IM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.evict_SM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.evict_SB : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.latency_GetS_hit : Accumulator : Sum.u64 = 438; SumSQ.u64 = 47966; Count.u64 = 4; Min.u64 = 108; Max.u64 = 111; - df_l1.latency_GetS_miss : Accumulator : Sum.u64 = 545; SumSQ.u64 = 59415; Count.u64 = 5; Min.u64 = 107; Max.u64 = 111; - df_l1.latency_GetX_hit : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.latency_GetX_miss : Accumulator : Sum.u64 = 215; SumSQ.u64 = 23113; Count.u64 = 2; Min.u64 = 107; Max.u64 = 108; - df_l1.latency_GetX_upgrade : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.latency_GetSX_hit : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.latency_GetSX_miss : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.latency_GetSX_upgrade : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.latency_FlushLine : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.latency_FlushLine_fail : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.latency_FlushLineInv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.latency_FlushLineInv_fail : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.GetSHit_Arrival : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.GetXHit_Arrival : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.GetSXHit_Arrival : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.GetSHit_Blocked : Accumulator : Sum.u64 = 4; SumSQ.u64 = 4; Count.u64 = 4; Min.u64 = 1; Max.u64 = 1; - df_l1.GetXHit_Blocked : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.GetSXHit_Blocked : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.GetSMiss_Arrival : Accumulator : Sum.u64 = 5; SumSQ.u64 = 5; Count.u64 = 5; Min.u64 = 1; Max.u64 = 1; - df_l1.GetXMiss_Arrival : Accumulator : Sum.u64 = 2; SumSQ.u64 = 2; Count.u64 = 2; Min.u64 = 1; Max.u64 = 1; - df_l1.GetSXMiss_Arrival : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.GetSMiss_Blocked : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.GetXMiss_Blocked : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.GetSXMiss_Blocked : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.CacheHits : Accumulator : Sum.u64 = 4; SumSQ.u64 = 4; Count.u64 = 4; Min.u64 = 1; Max.u64 = 1; - df_l1.CacheMisses : Accumulator : Sum.u64 = 7; SumSQ.u64 = 7; Count.u64 = 7; Min.u64 = 1; Max.u64 = 1; - df_l1.stateEvent_AckPut_I : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.eventSent_PutS : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.eventSent_PutE : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_GetS_E : Accumulator : Sum.u64 = 4; SumSQ.u64 = 4; Count.u64 = 4; Min.u64 = 1; Max.u64 = 1; - df_l1.stateEvent_GetX_E : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_GetSX_E : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FlushLine_E : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FlushLineInv_E : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FetchInv_E : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_ForceInv_E : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.stateEvent_FetchInvX_E : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.evict_E : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.TotalEventsReceived : Accumulator : Sum.u64 = 18; SumSQ.u64 = 18; Count.u64 = 18; Min.u64 = 1; Max.u64 = 1; - df_l1.TotalEventsReplayed : Accumulator : Sum.u64 = 4; SumSQ.u64 = 4; Count.u64 = 4; Min.u64 = 1; Max.u64 = 1; - df_l1.Put_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.Get_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.AckMove_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.GetS_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.Write_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.GetSX_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.GetSResp_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.WriteResp_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.CustomReq_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.CustomResp_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.CustomAck_uncache_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.NULLCMD_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.GetS_recv : Accumulator : Sum.u64 = 9; SumSQ.u64 = 9; Count.u64 = 9; Min.u64 = 1; Max.u64 = 1; - df_l1.GetX_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.GetSX_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.Write_recv : Accumulator : Sum.u64 = 2; SumSQ.u64 = 2; Count.u64 = 2; Min.u64 = 1; Max.u64 = 1; - df_l1.FlushLine_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.FlushLineInv_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.GetSResp_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.GetXResp_recv : Accumulator : Sum.u64 = 7; SumSQ.u64 = 7; Count.u64 = 7; Min.u64 = 1; Max.u64 = 1; - df_l1.FlushLineResp_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.Inv_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.ForceInv_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.Fetch_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.FetchInv_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.FetchInvX_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.NACK_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.AckPut_recv : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - df_l1.MSHR_occupancy : Accumulator : Sum.u64 = 1161; SumSQ.u64 = 8905; Count.u64 = 224; Min.u64 = 0; Max.u64 = 9; - df_l1.Bank_conflicts : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - memory.requests_received_GetS : Accumulator : Sum.u64 = 5; SumSQ.u64 = 5; Count.u64 = 5; Min.u64 = 1; Max.u64 = 1; - memory.requests_received_GetSX : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - memory.requests_received_GetX : Accumulator : Sum.u64 = 2; SumSQ.u64 = 2; Count.u64 = 2; Min.u64 = 1; Max.u64 = 1; - memory.requests_received_Write : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - memory.requests_received_PutM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - memory.outstanding_requests : Accumulator : Sum.u64 = 718; SumSQ.u64 = 2960; Count.u64 = 224; Min.u64 = 0; Max.u64 = 5; - memory.latency_GetS : Accumulator : Sum.u64 = 515; SumSQ.u64 = 53055; Count.u64 = 5; Min.u64 = 101; Max.u64 = 105; - memory.latency_GetSX : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - memory.latency_GetX : Accumulator : Sum.u64 = 203; SumSQ.u64 = 20605; Count.u64 = 2; Min.u64 = 101; Max.u64 = 102; - memory.latency_Write : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - memory.latency_PutM : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - memory.cycles_with_issue : Accumulator : Sum.u64 = 7; SumSQ.u64 = 7; Count.u64 = 7; Min.u64 = 1; Max.u64 = 1; - memory.cycles_attempted_issue_but_rejected : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - memory.total_cycles : Accumulator : Sum.u64 = 224; SumSQ.u64 = 50176; Count.u64 = 1; Min.u64 = 224; Max.u64 = 224; -Simulation is complete, simulated time: 224 ns diff --git a/src/sst/elements/llyr/tests/testsuite_default_llyr.py b/src/sst/elements/llyr/tests/testsuite_default_llyr.py deleted file mode 100644 index 871960bedc..0000000000 --- a/src/sst/elements/llyr/tests/testsuite_default_llyr.py +++ /dev/null @@ -1,78 +0,0 @@ -# -*- coding: utf-8 -*- - -from sst_unittest import * -from sst_unittest_support import * - - -class testcase_llyr_Component(SSTTestCase): - - def setUp(self): - super(type(self), self).setUp() - # Put test based setup code here. it is called once before every test - - def tearDown(self): - # Put test based teardown code here. it is called once after every test - super(type(self), self).tearDown() - -##### - - @unittest.skipIf(testing_check_get_num_ranks() > 1, "llyr: test_llyr_singlestream skipped if ranks > 1") - @unittest.skipIf(testing_check_get_num_threads() > 1, "llyr: test_llyr_singlestream skipped if threads > 1") - def test_llyr_singlestream(self): - self.llyr_test_template("llyr_test") - -##### - - def llyr_test_template(self, testcase, testtimeout=240): - # Get the path to the test files - test_path = self.get_testsuite_dir() - outdir = self.get_test_output_run_dir() - tmpdir = self.get_test_output_tmp_dir() - - # Set the various file paths - testDataFileName="test_llyr_{0}".format(testcase) - - sdlfile = "{0}/{1}.py".format(test_path, testcase) - reffile = "{0}/refFiles/{1}.out".format(test_path, testDataFileName) - outfile = "{0}/{1}.out".format(outdir, testDataFileName) - errfile = "{0}/{1}.err".format(outdir, testDataFileName) - mpioutfiles = "{0}/{1}.testfile".format(outdir, testDataFileName) - - self.run_sst(sdlfile, outfile, errfile, mpi_out_files=mpioutfiles, timeout_sec=testtimeout) - - testing_remove_component_warning_from_file(outfile) - - # NOTE: THE PASS / FAIL EVALUATIONS ARE PORTED FROM THE SQE BAMBOO - # BASED testSuite_XXX.sh THESE SHOULD BE RE-EVALUATED BY THE - # DEVELOPER AGAINST THE LATEST VERSION OF SST TO SEE IF THE - # TESTS & RESULT FILES ARE STILL VALID - - ### Check for success ### - - # Lines to ignore during diff - ## This is generated by SST when the number of ranks/threads > # of components - ignore_lines = ["WARNING: No components are assigned to"] - ## These are warnings/info generated by SST/memH in debug mode - ignore_lines.append("Notice: memory controller's region is larger than the backend's mem_size") - ignore_lines.append("Region: start=") - - filesAreTheSame, statDiffs, othDiffs = testing_stat_output_diff(outfile, reffile, ignore_lines, {}, True) - - # Perform the tests - if filesAreTheSame: - log_debug(" -- Output file {0} passed check against the Reference File {1}".format(outfile, reffile)) - elif use_pin_traces: ## PIN traces are generated dynamically and may diff, but the line count should match - # Use processed diffs so that ignore lines are still ignored - stat_lc = sum(1 for x in statDiffs if x[0] == "<") - oth_lc = sum(1 for x in othDiffs if x[0] == "<") - if stat_lc*2 == len(statDiffs) and oth_lc*2 == len(othDiffs): - log_debug(" -- Output file {0} pass line count check against the Reference File {1}".format(outfile, reffile)) - else: - diffdata = self._prettyPrintDiffs(statDiffs, othDiffs) - log_failure(diffdata) - self.assertTrue(filesAreTheSame, "Output file {0} does not pass line count check against the Reference File {1} ".format(outfile, reffile)) - - else: - diffdata = self._prettyPrintDiffs(statDiffs, othDiffs) - log_failure(diffdata) - self.assertTrue(filesAreTheSame, "Output file {0} does not pass check against the Reference File {1} ".format(outfile, reffile)) diff --git a/src/sst/elements/llyr/tools/gemm_gen.py b/src/sst/elements/llyr/tools/gemm_gen.py deleted file mode 100755 index 176342d1be..0000000000 --- a/src/sst/elements/llyr/tools/gemm_gen.py +++ /dev/null @@ -1,245 +0,0 @@ -#!/usr/bin/python3 - -import itertools -from collections import defaultdict - -debug = 0 -data_type = 0 #0 int, 1 fp64 - -m = 1 -k = 3 -n = 2 - -#m = 2 -#k = 1 -#n = 1 - -#m = 2 -#k = 2 -#n = 2 - -#m = 4 -#k = 2 -#n = 4 - -#m = 8 -#k = 4 -#n = 8 - -#m = 4 -#k = 4 -#n = 4 - -#m = 6 -#k = 3 -#n = 10 - -#m = 12 -#k = 12 -#n = 12 - -#m = 24 -#k = 24 -#n = 24 - -#m = 128 -#k = 128 -#n = 128 - -#m = 60 -#k = 30 -#n = 100 - -num_load = (m * k) + (k * n) -num_mul = k * m * n -num_add = (k - 1) * (m * n) -num_store = m * n - -pe_string_pre = "pe_type=" -edge_string_pre = "graphOut.addEdge" - -## -def extract_groups( grouping ): - temp_list = list(itertools.zip_longest(*(iter(grouping),) * 2, fillvalue=-1)) - return temp_list - -if( debug == 1 ): - print("Num load: " + str(num_load) + " 1-" + str(num_load)) - print("Num mult: " + str(num_mul) + " " + str(1 + num_load) + "-" + str(num_load+num_mul)) - print("Num add: " + str(num_add) + " " + str(1 + num_mul + num_load) + "-" + str(num_load+num_mul+num_add)) - print("Num store: " + str(num_store) + " " + str(1 + num_add + num_mul + num_load) + "-" + str(num_add+num_load+num_mul+num_store)) - print("\n") - - -# open the file -file = open("gemm.in", "w") - -pe_num = 1 -def write_pe( pe_string, pe, num_pes ): - file.write("%s [%s%s]\n" % (pe_num, pe_string_pre, pe)) - -# write loads -for counter in range( 0, num_load ): - write_pe( "LoadProcessingElement", "LD", pe_num ) - pe_num = pe_num + 1 - -# write mul -mul_start = pe_num -for counter in range( 0, num_mul ): - if( data_type == 1 ): - write_pe( "IntProcessingElement", "FMUL", pe_num ) - else: - write_pe( "IntProcessingElement", "MUL", pe_num ) - pe_num = pe_num + 1 - -# write add -add_start = pe_num -for counter in range( 0, num_add ): - if( data_type == 1 ): - write_pe( "IntProcessingElement", "FADD", pe_num ) - else: - write_pe( "IntProcessingElement", "ADD", pe_num ) - pe_num = pe_num + 1 - -# write store -store_start = pe_num -for counter in range( 0, num_store ): - write_pe( "StoreProcessingElement", "ST", pe_num ) - pe_num = pe_num + 1 - -file.write("\n") - -if( debug == 1 ): - print(" %s %s %s" % (mul_start, add_start, store_start)) - -# write edges -# connect loads to muls (first load is PE-1) -a_start = 1 -a_end = (k * m) -a_next = a_start -b_start = a_end + 1 -b_end = b_start + ((k * n) - 1) -b_next = b_start -if( debug == 1 ): - print("LD-MUL A:%s-%s B:%s-%s\n" % (a_start, a_end, b_start, b_end)) - file.write("LD-MUL A:%s-%s B:%s-%s\n" % (a_start, a_end, b_start, b_end)) - -mul_pe_dict = defaultdict(list) -add_pe_list = [] -final_add_pe_list = [] -new_mul = mul_start -for x in range( 0, m ): - b_offset = 0 - a_offset = a_start + (x * k) - mul_pe = new_mul - for y in range( 0, n ): - temp_list = [] - b_next = b_start + b_offset - #file.write("PE Start " + str(mul_pe) + " B-" + str(b_next) + "\n") - for z in range( 0, k ): - a_next = a_offset + z - mul_pe_dict[a_next].append(mul_pe) - mul_pe_dict[b_next].append(mul_pe) - temp_list.append(mul_pe) - #file.write("%s -- %s\n" % (a_next, b_next)) - b_next = b_next + n - mul_pe = mul_pe + 1 - #for key in mul_pe_dict.keys(): - #val = mul_pe_dict[key] - #print("Key", key, 'points to', val) - #print("\n") - #file.write("\n") - add_pe_list.append(temp_list) - b_offset = b_offset + 1 - new_mul = mul_pe - -if( debug == 1 ): - for val in add_pe_list: - print(val) - print("\n") - -for key in mul_pe_dict.keys(): - value = mul_pe_dict[key] - for val in value: - file.write("%s -- %s\n" % (key, val)) - file.write("\n") - -# connect adds to muls -test = [] -if( debug == 1 ): - file.write("MUL-ADD A:%s-%s B:%s-%s\n" % (a_start, a_end, b_start, b_end)) - -def add_tree( boop, next_add ): - global add_start - add_pe_groups = [] - if( len(boop) > 1 ): - if( debug == 1 ): - print("Here") - print(boop) - for x in boop: - if( x.count(-1) == 0 ): - file.write("%s -- %s\n" % (x[0], next_add)) - file.write("%s -- %s\n" % (x[1], next_add)) - add_pe_groups.append(next_add) - next_add = next_add + 1 - if( debug == 1 ): - print(x) - else: - add_pe_groups.append(x[0]) - if( debug == 1 ): - print(x) - - add_start = next_add - if( debug == 1 ): - print(add_pe_groups) - test = extract_groups(add_pe_groups) - if( debug == 1 ): - print(test) - add_tree(test, add_start) - else: - if( debug == 1 ): - print("Here 2") - print(boop) - for x in boop: - if( x.count(-1) == 0 ): - file.write("%s -- %s\n" % (x[0], next_add)) - file.write("%s -- %s\n" % (x[1], next_add)) - add_pe_groups.append(next_add) - final_add_pe_list.append(next_add) - next_add = next_add + 1 - if( debug == 1 ): - print(x) - else: - final_add_pe_list.append(x[0]) - if( debug == 1 ): - print(x) - - add_start = next_add - if( debug == 1 ): - print(add_pe_groups) - print(final_add_pe_list) - -for value in add_pe_list: - test = list(itertools.zip_longest(*(iter(value),) * 2, fillvalue=-1)) - if( debug == 1 ): - print("ADD") - print(test) - add_tree(test, add_start) - file.write("\n") - if( debug == 1 ): - print("\n") - -## Connect stores -next_store = store_start -for value in final_add_pe_list: - if( debug == 1 ): - print("ST") - print(value) - file.write("%s -- %s\n" % (value, next_store)) - next_store = next_store + 1 - -file.write("\n") - - -# cleanup -file.close() diff --git a/src/sst/elements/llyr/tools/graphGen.py b/src/sst/elements/llyr/tools/graphGen.py deleted file mode 100755 index 0795606aad..0000000000 --- a/src/sst/elements/llyr/tools/graphGen.py +++ /dev/null @@ -1,128 +0,0 @@ -#!/usr/bin/env python3 -import numpy as np -import networkx as nx -import matplotlib.pyplot as plt -from collections import defaultdict - -# mesh, torus, all -output_type = "all" - -# node operation -row_ops = 0 -col_ops = 0 -edge_ops = 0 - -G = nx.DiGraph() - -#for rows in (2, 4, 8, 16, 32, 64): -for rows in (2, 4, 8, 16, 32): - cols = rows - num_nodes = rows * cols - graph_topology = np.arange(num_nodes).reshape(rows, cols) - - fileName = "graph_" + output_type + "_" + str(num_nodes) + ".dot" - - for i in range(0, num_nodes): - G.add_node(i) - - if( output_type == "mesh" ): - node_num = 0 - for i in range(0, rows): - for j in range(0, cols): - num_neighbors = 0 - for i_x in range(max(0,i-1), min(cols,i+2)): - for j_y in range(max(0,j-1), min(rows,j+2)): - if( i == i_x and j == j_y ): - continue - elif( i < i_x or i > i_x ): - if( j < j_y or j > j_y ): - continue - num_neighbors = num_neighbors + 1 - - for i_x in range(max(0,i-1), min(cols,i+2)): - for j_y in range(max(0,j-1), min(rows,j+2)): - if( i == i_x and j == j_y ): - continue - elif( i < i_x or i > i_x ): - if( j < j_y or j > j_y ): - continue - G.add_edge(node_num, graph_topology[i_x][j_y]) - - node_num = node_num + 1 - - elif( output_type == "all" ): - node_list = {new_list: [] for new_list in range(num_nodes)} - for node in node_list: - node_num = 0 - for i in range(0, rows): - for j in range(0, cols): - if( str(node_num) != str(node) ): - node_list[node].append(node_num) - node_num = node_num + 1 - - for node in node_list: - for neighbor in node_list[node]: - G.add_edge(node, neighbor) - - else: - node_num = 0 - for i in range(0, rows): - for j in range(0, cols): - num_neighbors = 0 - for i_x in range(max(0,i-1), min(cols,i+2)): - for j_y in range(max(0,j-1), min(rows,j+2)): - if( i == i_x and j == j_y ): - continue - num_neighbors = num_neighbors + 1 - - for i_x in range(max(0,i-1), min(cols,i+2)): - for j_y in range(max(0,j-1), min(rows,j+2)): - if( i == i_x and j == j_y ): - continue - G.add_edge(node_num, graph_topology[i_x][j_y]) - node_num = node_num + 1 - - ## All nodes can do this op - if( row_ops == 0 and col_ops == 0 and edge_ops == 0 ): - for node in G.nodes(): - G.nodes[node]["op"] = "any" - else: - for node in G.nodes(): - G.nodes[node]["op"] = "arithmetic" - - ## First and last row have different op - if( row_ops == 1 ): - node = 0 - for i in range(0, rows): - for j in range(0, cols): - if( i == 0 or i == rows - 1 ): - G.nodes[node]["op"] = "memory" - node = node + 1 - - ## First and last column have different op - if( col_ops == 1 ): - node = 0 - for i in range(0, rows): - for j in range(0, cols): - if( j == 0 or j == cols - 1 ): - G.nodes[node]["op"] = "memory" - node = node + 1 - - ## Edges have different op - if( edge_ops == 1 ): - node = 0 - for i in range(0, rows): - for j in range(0, cols): - if( j == 0 or j == cols - 1 or i == 0 or i == rows - 1 ): - G.nodes[node]["op"] = "memory" - node = node + 1 - - nx.nx_pydot.write_dot(G, fileName) - - nx.nx_pydot.graphviz_layout(G, prog="neato") # prog options: neato, dot, fdp, sfdp, twopi, circo - nx.draw(G, pos = nx.nx_pydot.graphviz_layout(G), - node_size=1200, node_color='lightblue', linewidths=0.25, - font_size=10, font_weight='bold', with_labels=True) - #plt.show() - - diff --git a/src/sst/elements/llyr/tools/graph_gen.py b/src/sst/elements/llyr/tools/graph_gen.py deleted file mode 100755 index 645337df37..0000000000 --- a/src/sst/elements/llyr/tools/graph_gen.py +++ /dev/null @@ -1,90 +0,0 @@ -#!/usr/bin/python - -#import graphviz - -total_pes = 8 -pe_num = 0 - -#g = graphviz.Graph("Hardware Description", filename='hardware.cfg') - -### create nodes -#for counter in range( 0, total_pes ): - #g.node( str(counter), 'any') - -### create edges for fully conntected graph -#for x in range( 0, total_pes ): - #for y in range( x + 1, total_pes ): - #g.edge( str(x), str(y)) - -##g.view() - -import os -import networkx as nx -from networkx.algorithms import isomorphism -from networkx.drawing.nx_pydot import write_dot -from networkx.algorithms import isomorphism as iso - -hw_graph = nx.DiGraph() -sw_graph = nx.DiGraph() - -### create nodes -for counter in range( 0, total_pes ): - hw_graph.add_node( counter, label="any") - -## create edges for fully conntected graph -for x in range( 0, total_pes ): - for y in range( x + 1, total_pes ): - hw_graph.add_edge( x, y ) - -## app graphFile -sw_graph.add_node( 1, label="any") -sw_graph.add_node( 2, label="any") -sw_graph.add_node( 3, label="any") -sw_graph.add_node( 4, label="any") -sw_graph.add_node( 5, label="any") -sw_graph.add_node( 6, label="any") -sw_graph.add_node( 7, label="any") -sw_graph.add_node( 8, label="any") - -sw_graph.add_edge( 1, 4 ) -sw_graph.add_edge( 2, 4 ) -#sw_graph.add_edge( 2, 5 ) -#sw_graph.add_edge( 3, 5 ) -#sw_graph.add_edge( 4, 6 ) -#sw_graph.add_edge( 5, 7 ) - - -# Nodes 'a', 'b', 'c' and 'd' form a column. -# Nodes 'g', 'h', 'i' and 'j' form a column. -g1edges = [['a', 'g'], ['a', 'h'], ['a', 'i'], - ['b', 'g'], ['b', 'h'], ['b', 'j'], - ['c', 'g'], ['c', 'i'], ['c', 'j'], - ['d', 'h'], ['d', 'i'], ['d', 'j']] - -# Nodes 1,2,3,4 form the clockwise corners of a large square. -# Nodes 5,6,7,8 form the clockwise corners of a small square -g2edges = [[1, 2], [2, 3], [3, 4], [4, 1], - [5, 6], [6, 7], [7, 8], [8, 5], - [1, 5], [2, 6], [3, 7], [4, 8]] - -g1 = nx.Graph() -g2 = nx.Graph() -g1.add_edges_from(g1edges) -g2.add_edges_from(g2edges) -#g3 = g2.subgraph([1, 2, 3, 4]) -#gm = iso.GraphMatcher(g1, g2) - -gm = iso.DiGraphMatcher(hw_graph, sw_graph) -for subgraph in gm.subgraph_isomorphisms_iter(): - print( subgraph ) - -gm = iso.DiGraphMatcher(sw_graph, hw_graph) -for subgraph in gm.subgraph_isomorphisms_iter(): - print( subgraph ) - -nx.drawing.nx_pydot.write_dot(sw_graph, '/tmp/sw.dot') -nx.drawing.nx_pydot.write_dot(hw_graph, '/tmp/hw.dot') - -os.system("dot -Tpdf /tmp/sw.dot -o /tmp/sw_test.pdf") -os.system("dot -Tpdf /tmp/hw.dot -o /tmp/hw_test.pdf") - diff --git a/src/sst/elements/llyr/tools/pyMapper.py b/src/sst/elements/llyr/tools/pyMapper.py deleted file mode 100644 index f957fd2bb1..0000000000 --- a/src/sst/elements/llyr/tools/pyMapper.py +++ /dev/null @@ -1,7 +0,0 @@ -import os -dir_path = os.path.dirname(os.path.realpath(__file__)) - -print('External Python program running...') -print('Hello World from Python program') -print(dir_path) - diff --git a/src/sst/elements/llyr/tools/spmm_gen.py b/src/sst/elements/llyr/tools/spmm_gen.py deleted file mode 100755 index 968bfd3a90..0000000000 --- a/src/sst/elements/llyr/tools/spmm_gen.py +++ /dev/null @@ -1,301 +0,0 @@ -#!/usr/bin/python3 - -import itertools -from collections import defaultdict - -debug = 0 -data_type = 0 #0 int, 1 fp64 - -## mxn * nxk -#m = 1 -#n = 3 -#k = 2 - -m = 3 -n = 3 -k = 2 - -#m = 3 -#n = 3 -#k = 3 - -#m = 12 -#n = 12 -#k = 12 - -#m = 16 -#n = 16 -#k = 16 - -## init non-zeroes -#non_zeroes = [1] -non_zeroes = [1,2,3,4,5] - -## init row_ptr -#row_ptr = [0,1] -row_ptr = [0,1,3,5] -#row_ptr = [0,1,4,5] - -## init col_ptr -#col_ptr = [0,1] -col_ptr = [0,0,1,1,2] -#col_ptr = [0,0,1,2,2] - -## init dense mat -dense_mat = [] -for i in range( n ): - x = [] - for j in range( k ): - x.append(0) - dense_mat.append(x) - -dense_mat[0][0] = 6 -dense_mat[0][1] = 7 -dense_mat[1][0] = 8 -dense_mat[1][1] = 9 -dense_mat[2][0] = 10 -dense_mat[2][1] = 11 - -#dense_mat[0][2] = 12 -#dense_mat[1][2] = 13 -#dense_mat[2][2] = 14 - - -## write memory file -mem_file = open("spmm-mem.in", "w") - -# write k -mem_file.write(str(k) + ",") - -#write v -converted_list = [str(element) for element in non_zeroes] -mem_file.write(",".join(converted_list)) -mem_file.write(",") - -#write row_ptr -converted_list = [str(element) for element in row_ptr] -mem_file.write(",".join(converted_list)) -mem_file.write(",") - -#write col_ptr -converted_list = [str(element) for element in col_ptr] -mem_file.write(",".join(converted_list)) -mem_file.write(",") - -for i in dense_mat: - converted_list = [str(element) for element in i] - mem_file.write(",".join(converted_list)) - mem_file.write(",") - -#fill memory -for i in range(255): - mem_file.write(str(255)) - if( i != 255 ): - mem_file.write(",") - -mem_file.close() - -##################################### -pe_string_pre = " [pe_type=" -adj_list = defaultdict(list) - -result = [] -for i in range( m ): - x = [] - for j in range( k ): - x.append(0) - result.append(x) - -# open the file -file = open("spmm.in", "w") - -#for x in result: - #converted_list = [str(element) for element in x] - #print(",".join(converted_list)) - -v_start_addr = 8 -r_start_addr = v_start_addr + (8 * len(non_zeroes)) -c_start_addr = r_start_addr + (8 * len(row_ptr)) -d_start_addr = c_start_addr + (8 * len(col_ptr)) -store_start_addr = 256 - -print("size of non_zeroes = " + str(len(non_zeroes)) + " " + str(v_start_addr)) -print("size of row_ptr = " + str(len(row_ptr)) + " " + str(r_start_addr)) -print("size of col_ptr = " + str(len(col_ptr)) + " " + str(c_start_addr)) -print("size of dense = " + str(len(dense_mat)) + " " + str(d_start_addr)) - -# write ld for k -adj_list[1].append(2) -file.write( str(1) + pe_string_pre + "LDADDR,0" + "]" + "\n" ) -file.write( str(2) + pe_string_pre + "MULCONST,8" + "]" + "\n" ) -file.write( "\n" ) - -pe_id = 3 -#for x in range( 0, m ): -for x in range( 0, len(row_ptr) - 1 ): - sp_ld_dict = defaultdict(int) - col_ld_dict = defaultdict(int) - partial_product = defaultdict(list) - - print(x) - for y in range( row_ptr[x], row_ptr[x+1] ): - ## LD sparse[j] - temp_addr = v_start_addr + (8 * y) - file.write( str(pe_id) + pe_string_pre + "LDADDR," + str(temp_addr) + "]" + "\n" ) - sp_ld_dict[y] = pe_id - pe_id = pe_id + 1 - - ## LD column[j] - temp_addr = c_start_addr + (8 * y) - file.write( str(pe_id) + pe_string_pre + "LDADDR," + str(temp_addr) + "]" + "\n" ) - col_ld_dict[y]= pe_id - pe_id = pe_id + 1 - - for z in range( 0, k ): - #print( str(x) + " " + str(y) + " " + str(z) ) - #print( str(result[x][z]) + " + " + str (non_zeroes[y]) + " * " + str(dense_mat[col_ptr[y]][z]) ) - result[x][z] = result[x][z] + non_zeroes[y] * dense_mat[col_ptr[y]][z] - - # calculate col in d[][] - mul_1 = pe_id - temp_pe = col_ld_dict[y] - adj_list[temp_pe].append(pe_id) - adj_list[2].append(pe_id) - file.write(str(pe_id) + pe_string_pre + "MUL" + "]" + "\n") - pe_id = pe_id + 1 - - # calculate row in d[][] - offset_pe = pe_id - adj_list[mul_1].append(pe_id) - offset = d_start_addr + (z * 8) - file.write(str(pe_id) + pe_string_pre + "ADDCONST," + str(offset) + "]" + "\n") - pe_id = pe_id + 1 - - # sparse[j] * dense[] - mul_2 = pe_id - temp_pe = sp_ld_dict[y] - adj_list[temp_pe].append(pe_id) - partial_product[z].append(mul_2) - file.write(str(pe_id) + pe_string_pre + "MUL" + "]" + "\n") - pe_id = pe_id + 1 - - # LD dense[col[j]][k] - ld_dense = pe_id - adj_list[offset_pe].append(pe_id) - adj_list[pe_id].append(mul_2) - file.write(str(pe_id) + pe_string_pre + "LD" + "]" + "\n") - pe_id = pe_id + 1 - - ## ST final value - #final_st[z].append(pe_id) - #file.write(str(pe_id) + pe_string_pre + "STADDR," + str(store_start_addr) + "]" + "\n") - #store_start_addr = store_start_addr + 8 - #pe_id = pe_id + 1 - - file.write( "\n" ) - - #for key in sp_ld_dict.keys(): - #val = sp_ld_dict[key] - #print("SP Key", key, 'points to', val) - - #for key in col_ld_dict.keys(): - #val = col_ld_dict[key] - #print("COL Key", key, 'points to', val) - - for key in partial_product.keys(): - val = partial_product[key] - #print("PP Key", key, 'points to', val) - - # match the accumulators - if( len(val) > 1 ): - acc_list = [] - #print("Moo! Cows! " + str(len(val))) - while( len(val) > 1 ): - #print("A " + str(len(val))) - adj_list[val.pop()].append(pe_id) - adj_list[val.pop()].append(pe_id) - acc_list.append(pe_id) - file.write(str(pe_id) + pe_string_pre + "ADD" + "]" + "\n") - pe_id = pe_id + 1 - if( len(val) > 0 ): - #print("B " + str(len(val))) - adj_list[val.pop()].append(pe_id) - acc_list.append(pe_id) - file.write(str(pe_id) + pe_string_pre + "ADD" + "]" + "\n") - pe_id = pe_id + 1 - # adder tree - if( len(acc_list) > 1 ): - #print("Boo! Who? " + str(len(acc_list))) - while( len(acc_list) > 1 ): - #print("C " + str(len(acc_list))) - # check if the adder has a single input - num_found = 0 - for key in adj_list.keys(): - top = adj_list[key] - if( acc_list[-1] in top ): - num_found = num_found + 1 - #print("Found " + str(top)) - - # if the tail has a single input, it's the bottom of the tree because muls are grouped above - if( num_found == 1 ): - adj_list[acc_list[-2]].append(acc_list[-1]) - - # otherwise we need another adder - else: - adj_list[acc_list[-1]].append(pe_id) - adj_list[acc_list[-2]].append(pe_id) - file.write(str(pe_id) + pe_string_pre + "ADD" + "]" + "\n") - pe_id = pe_id + 1 - - #print(str(acc_list.pop())) - #print(str(acc_list.pop())) - acc_list.pop() - acc_list.pop() - - if( len(acc_list) > 0 ): - #print("D " + str(len(acc_list))) - adj_list[pe_id - 1].append(pe_id) - file.write(str(pe_id) + pe_string_pre + "ADD" + "]" + "\n") - pe_id = pe_id + 1 - print(str(acc_list.pop())) - - # ST final value - adj_list[pe_id - 1].append(pe_id) - file.write(str(pe_id) + pe_string_pre + "STADDR," + str(store_start_addr) + "]" + "\n") - store_start_addr = store_start_addr + 8 - pe_id = pe_id + 1 - - else: - #print("Boo! " + str(acc_list[0])) - - # ST final value - file.write(str(pe_id) + pe_string_pre + "STADDR," + str(store_start_addr) + "]" + "\n") - store_start_addr = store_start_addr + 8 - pe_id = pe_id + 1 - - else: - #print("Narf! " + str(partial_product[key][0])) - # ST final value - adj_list[partial_product[key][0]].append(pe_id) - file.write(str(pe_id) + pe_string_pre + "STADDR," + str(store_start_addr) + "]" + "\n") - store_start_addr = store_start_addr + 8 - pe_id = pe_id + 1 - - #print( "\n" ) - -for key in adj_list.keys(): - val = adj_list[key] - #print("ADJ Key", key, 'points to', val) - for i in val: - file.write(str(key) + " -- " + str(i) + "\n") - file.write("\n") - -for x in result: - converted_list = [str(element) for element in x] - print(",".join(converted_list)) - - - -# cleanup -file.close() - diff --git a/src/sst/elements/llyr/tools/vf3_to_hwr.py b/src/sst/elements/llyr/tools/vf3_to_hwr.py deleted file mode 100755 index 1785810531..0000000000 --- a/src/sst/elements/llyr/tools/vf3_to_hwr.py +++ /dev/null @@ -1,57 +0,0 @@ -#!/usr/bin/python3 -import sys -import getopt -import numpy as np - -input_file = "" -output_file = "" - -opts, argv = getopt.getopt(sys.argv[1:], 'i:o:') - -for opt, arg in opts: - if opt in( "-i", "--ifile" ): - input_file = arg - elif opt in( "-o", "--ofile" ): - output_file = arg - -first_line = 0 -node_list = 0 -temp_edges = 0 -dotFile = open( output_file, "w" ) -parse = open( input_file, 'r' ) - -dotFile.write( "digraph \"Hardware Description\" {\n" ) -#dotFile.write( "layout=sfdp\n" ) -#dotFile.write( "node [shape=plaintext]\n" ) - -for line in parse: - if( first_line == 0 ): - first_line = 1 - elif( line == "\n" ): - if( node_list == 0 ): - node_list = 1 - elif( node_list == 1 ): - node_list = 2 - temp_edges = 1 - else: - temp_edges = 1 - else: - tempLine = line.strip().split() - if( node_list == 1 ): - dotFile.write( str(tempLine[0]) + " " + "[label=any]") - dotFile.write( "\n" ) - elif( temp_edges == 1 ): - temp_edges = 0 - else: - dotFile.write( str(tempLine[0]) + "--" + str(tempLine[1]) ) - dotFile.write( "\n" ) - -dotFile.write( "}\n" ) - -parse.close() -dotFile.close() - - #for file in ; do ../convert_dot.py -i $file -o "${file}.dot"; done - #for file in gemm*; do if [[ $file != *".vf3" ]]; then echo $file; fi done - #for file in gemm*; do if [[ $file == *".vf3" ]]; then ../convert_dot.py -i $file -o "${file}.dot"; fi done - diff --git a/src/sst/elements/opal/Makefile.am b/src/sst/elements/opal/Makefile.am deleted file mode 100644 index e01442bbb1..0000000000 --- a/src/sst/elements/opal/Makefile.am +++ /dev/null @@ -1,34 +0,0 @@ -# -*- Makefile -*- -# -# - -AM_CPPFLAGS += \ - $(MPI_CPPFLAGS) \ - -I$(top_srcdir)/src - -AM_LDFLAGS = -lm -compdir = $(pkglibdir) -comp_LTLIBRARIES = libOpal.la - -libOpal_la_SOURCES = \ - mempool.h \ - mempool.cc \ - opal.cc \ - opal.h \ - opal_event.h \ - arielmemmgr_opal.cc \ - arielmemmgr_opal.h \ - opalMemNIC.cc \ - opalMemNIC.h \ - page_fault_handler.cc \ - page_fault_handler.h - -libOpal_la_LDFLAGS = \ - -avoid-version - -libOpal_la_LIBADD = \ - $(SST_SYSTEMC_LIB) - -install-exec-hook: - $(SST_REGISTER_TOOL) SST_ELEMENT_SOURCE Opal=$(abs_srcdir) - $(SST_REGISTER_TOOL) SST_ELEMENT_TESTS Opal=$(abs_srcdir)/tests diff --git a/src/sst/elements/opal/README b/src/sst/elements/opal/README deleted file mode 100644 index 22081a5b25..0000000000 --- a/src/sst/elements/opal/README +++ /dev/null @@ -1,20 +0,0 @@ - - -Opal is a centralized memory manager that will be used to request physical memory from the system - -Opal will be configured and exposed to the details (capacity and memory types) of the shared memory system. - -A single or multiple/hirarchical (in case of disaggregated memory system) Opal components should be instantiated and connected to: - -1- Samba unit --- A- Minor page faults (or first access to virtual page) in Samba will trigger a request for a physical page from Opal manager - B- Any updates or page migration requests to Opal will trigger TLB shootdown to be sent to all samba units - -2- Ariel (or other core types) --- - A- unmap requests can be sent directly to Opal to deallocate physical space, which might also result in Opal sending TLB shootdowns to Samba units - B- Although virtual memory allocations (mmap, new, malloc) do not immediately allocate physical space, sometimes they carry out some hints such as memory type, huge page preference, pinning hints. Accordingly, Opal need to keep track of such preferences for specific virtual address ranges of different virtual memory spaces. - -Assumptions and limitations: -* In real systems, libc or any memory allocation library typically return allocated physical space to the OS through unmap, either when unmap is called by the application or when enough chunks have been returned by the applications through **free/delete**. For instance, if free/delete tries to delete a large allocation, then it can be beneficial to immediately return its corresponding physical allocation to the OS, so other applications can allocate it. Also when there are many contigious chunks that have been freed by the application and they can be coalecsed in multiple virtual pages, then the memory allocation library might return them back to the OS through unmap. Due to the overhead (TLB shootdown and context switching) of returning physical pages to the OS, memory allocation libraries try to reuse the **freed/deleted** virtual spaces. Accordingly, since we don't simulate libc or memory allocation library behavior, we only return physical spaces to Opal when unmap is called by the applications, and do not do anything when free/delete is called. - -Possible TODO: -When free is called for a **really** large allocation, we can assume the libc (or other memory allocation libraries) will immediately return this space to Opal diff --git a/src/sst/elements/opal/arielmemmgr_opal.cc b/src/sst/elements/opal/arielmemmgr_opal.cc deleted file mode 100644 index 2966be7c80..0000000000 --- a/src/sst/elements/opal/arielmemmgr_opal.cc +++ /dev/null @@ -1,118 +0,0 @@ -// Copyright 2009-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#include -#include "arielmemmgr_opal.h" -#include "opal_event.h" - -#include - - -using namespace SST::OpalComponent; - -MemoryManagerOpal::MemoryManagerOpal(ComponentId_t id, Params& params) : - ArielComponent::ArielMemoryManager(id, params) { - - // Find links - std::string linkprefix = "opal_link_"; - std::string linkname = linkprefix + "0"; - int numPorts = 0; - - std::string latency = params.find("opal_latency", "32ps"); - - while (isPortConnected(linkname)) { - SST::Link* link = configureLink(linkname, latency, new Event::Handler(this, &MemoryManagerOpal::handleInterrupt)); - opalLink.push_back(link); - numPorts++; - linkname = linkprefix + std::to_string(numPorts); - } - - std::string translatorstr = params.find("translator", "ariel.MemoryManagerSimple"); - if (NULL != (temp_translator = loadUserSubComponent("translator"))) { - output->verbose(CALL_INFO, 1, 0, "Opal is using named subcomponent translator\n"); - } else { - int memLevels = params.find("memmgr.memorylevels", 1); - if (translatorstr == "ariel.MemoryManagerSimple" && memLevels > 1) { - output->verbose(CALL_INFO, 1, 0, "Warning - the default 'ariel.MemoryManagerSimple' does not support multiple memory levels. Configuring anyways but memorylevels will be 1.\n"); - params.insert("memmgr.memorylevels", "1", true); - } - output->verbose(CALL_INFO, 1, 0, "Loading memory manager: %s\n", translatorstr.c_str()); - Params translatorParams = params.get_scoped_params("memmgr"); - temp_translator = loadAnonymousSubComponent(translatorstr, "translator", 0, ComponentInfo::SHARE_STATS | ComponentInfo::INSERT_STATS, translatorParams); - if (NULL == temp_translator) - output->fatal(CALL_INFO, -1, "Failed to load memory manager: %s\n", translatorstr.c_str()); - } -} - - -MemoryManagerOpal::~MemoryManagerOpal() { -} - - -void MemoryManagerOpal::handleInterrupt(SST::Event *event) { - OpalEvent * ev = dynamic_cast(event); // TODO can we static_cast instead? - output->verbose(CALL_INFO, 4, 0, "Core %" PRIu32 " handling opal interrupt event\n", ev->getCoreId()); - - switch(ev->getType()) { - case SST::OpalComponent::EventType::SHOOTDOWN: - (*(interruptHandler[ev->getCoreId()]))(ArielComponent::ArielMemoryManager::InterruptAction::STALL); - break; - case SST::OpalComponent::EventType::SDACK: - (*(interruptHandler[ev->getCoreId()]))(ArielComponent::ArielMemoryManager::InterruptAction::UNSTALL); - break; - default: - output->fatal(CALL_INFO, -4, "Opal event interrupt to core: %" PRIu32 " was not valid.\n", ev->getCoreId()); - } -} - -bool MemoryManagerOpal::allocateMalloc(const uint64_t size, const uint32_t level, const uint64_t addr, const uint64_t ip, const uint32_t thread) { - OpalEvent * tse = new OpalEvent(OpalComponent::EventType::HINT, level, addr, size, thread); - opalLink[thread]->send(tse); - - return temp_translator->allocateMalloc(size, level, addr, ip, thread); -} - -bool MemoryManagerOpal::allocateMMAP(const uint64_t size, const uint32_t level, const uint64_t addr, const uint64_t ip, const uint32_t file, const uint32_t thread) { - OpalEvent * tse = new OpalEvent(OpalComponent::EventType::HINT, level, addr, size, thread); - tse->setFileId(file); - output->output("Before sending to Opal.. file ID is: %" PRIu32 "\n", file); - opalLink[thread]->send(tse); - return true; -} - -void MemoryManagerOpal::freeMalloc(const uint64_t virtualAddress) { - temp_translator->freeMalloc(virtualAddress); -} - -void MemoryManagerOpal::freeMMAP(const uint32_t file) { -} - - -void MemoryManagerOpal::setDefaultPool(uint32_t pool) { - temp_translator->setDefaultPool(pool); -} - -uint32_t MemoryManagerOpal::getDefaultPool() { - return temp_translator->getDefaultPool(); -} - -uint64_t MemoryManagerOpal::translateAddress(uint64_t virtAddr) { - return temp_translator->translateAddress(virtAddr); -} - -void MemoryManagerOpal::printStats() { - temp_translator->printStats(); -} - diff --git a/src/sst/elements/opal/arielmemmgr_opal.h b/src/sst/elements/opal/arielmemmgr_opal.h deleted file mode 100644 index ae001b1c33..0000000000 --- a/src/sst/elements/opal/arielmemmgr_opal.h +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2009-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - - -#ifndef _H_OPAL_MEM_MANAGER_OPAL -#define _H_OPAL_MEM_MANAGER_OPAL - -#include -#include - -#include "sst/elements/ariel/arielmemmgr.h" - -#include -#include -#include -#include - -using namespace SST; - -namespace SST { -namespace OpalComponent { - -class MemoryManagerOpal : public ArielComponent::ArielMemoryManager { - - public: - /* SST ELI */ - SST_ELI_REGISTER_SUBCOMPONENT( - MemoryManagerOpal, - "Opal", - "MemoryManagerOpal", - SST_ELI_ELEMENT_VERSION(1,0,0), - "Memory manager which uses the Opal memory allocation component", - SST::ArielComponent::ArielMemoryManager - ) - - SST_ELI_DOCUMENT_PARAMS( - { "corecount", "Sets the verbosity of the memory manager output", "1"}, - { "opal_latency", "latency to communicate to the Opal manager", "32ps"}, - { "translator", "(temporary) translation memory manager to actually translate addresses for now", "MemoryManagerSimple"} ) - - SST_ELI_DOCUMENT_PORTS( - {"opal_link_%(corecound)d", "Each core's link to the Opal memory manager", {"Opal.OpalEvent"}} - ) - - SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS( - { "translator", "Temporarily, which memory manager to use to translate addresses", "SST::Ariel::ArielMemoryManager" } - ) - - /* MemoryManagerOpal */ - MemoryManagerOpal(ComponentId_t id, Params& params); - ~MemoryManagerOpal(); - - /* Call through to temporary translator */ - void setDefaultPool(uint32_t pool); - uint32_t getDefaultPool(); - - uint64_t translateAddress(uint64_t virtAddr); - void printStats(); - - /* Call through to Opal */ - bool allocateMalloc(const uint64_t size, const uint32_t level, const uint64_t virtualAddress, const uint64_t instructionPointer, const uint32_t thread); - bool allocateMMAP(const uint64_t size, const uint32_t level, const uint64_t virtualAddress, const uint64_t instructionPointer, const uint32_t file, const uint32_t thread); - void freeMalloc(const uint64_t vAddr); - void freeMMAP(const uint32_t file); - - void handleInterrupt(SST::Event * event); - - private: - ArielMemoryManager* temp_translator; // Temporary while Opal still uses Ariel's built-in translator - - std::vector opalLink; -}; - -} -} - -#endif diff --git a/src/sst/elements/opal/configure.m4 b/src/sst/elements/opal/configure.m4 deleted file mode 100644 index 958d5d4b31..0000000000 --- a/src/sst/elements/opal/configure.m4 +++ /dev/null @@ -1,6 +0,0 @@ -dnl -*- Autoconf -*- - -AC_DEFUN([SST_opal_CONFIG], [ - sst_opal_comp_happy="yes" -AS_IF([test "x$sst_opal_comp_happy" = "xyes"], [$1], [$2]) -]) diff --git a/src/sst/elements/opal/mempool.cc b/src/sst/elements/opal/mempool.cc deleted file mode 100644 index 870303488e..0000000000 --- a/src/sst/elements/opal/mempool.cc +++ /dev/null @@ -1,294 +0,0 @@ -// Copyright 2009-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2024, NTESS -// All rights reserved. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. -// - -#include - -#include "mempool.h" - -#include -#include - -//Constructor for pool -Pool::Pool(Params params, SST::OpalComponent::MemType mem_type, int id) -{ - - output = new SST::Output("OpalMemPool[@f:@l:@p] ", 16, 0, SST::Output::STDOUT); - - size = params.find("size", 0); // in KB's - - start = params.find("start", 0); - - frsize = params.find("frame_size", 4); //4KB frame size - - memType = mem_type; - - poolId = id; - - /*char* subID = (char*) malloc(sizeof(char) * 32); - sprintf(subID, "%" PRIu32, id); - - if(memType == SST::OpalComponent::MemType::LOCAL) { - memUsage = own->registerStatistic( "local_mem_usage", subID ); - mappedMemory = own->registerStatistic( "local_mem_mapped", subID ); - unmappedMemory = own->registerStatistic( "local_mem_unmapped", subID ); - tlbShootdowns = own->registerStatistic( "tlb_shootdowns", subID ); - tlbShootdownDelay = own->registerStatistic( "tlb_shootdown_delay", subID ); - } - else { - memUsage = own->registerStatistic( "shared_mem_usage", subID ); - mappedMemory = own->registerStatistic( "shared_mem_mapped", subID ); - unmappedMemory = own->registerStatistic( "shared_mem_unmapped", subID ); - } - - free(subID); - */ - /* memory technology - * 0: DRAM - * 1: NVRAM - */ - uint32_t mem_tech = (uint32_t) params.find("mem_tech", 0); - switch(mem_tech) - { - case 1: - memTech = SST::OpalComponent::MemTech::NVM; - break; - case 0: - default: - memTech = SST::OpalComponent::MemTech::DRAM; - } - - std::cerr << "Pool start: " << start << " size: " << size << " frame size: " << frsize << " mem tech: " << mem_tech << std::endl; - build_mem(); - -} - -//Create free frames of size framesize, note that the size is in KB -void Pool::build_mem() -{ - int i=0; - num_frames = ceil(size/frsize); - real_size = num_frames * frsize; - - //std::vector numbers; - //for(int i=0; i frames_allocated; - - // Fixme: Shuffle memory to make continuous memory available - while(frames) { - // Make sure pool has free frames in the requested memory pool type. If not deallocate allocated frames (if number of frames to be allocated are > 1) - if(freelist.empty()) { - while(!frames_allocated.empty()) { - Frame *frame = frames_allocated.front(); - freelist.push_back(frame->starting_address); - alloclist.erase(frame->starting_address); - frames_allocated.pop_front(); - available_frames++; - delete frame; - } - break; - } - else - { - uint64_t frameAddr = freelist.front(); - freelist.erase(freelist.begin()); - Frame *frame = new Frame(frameAddr,0); - alloclist[frame->starting_address] = frame; - frames_allocated.push_back(frame); - available_frames--; - } - frames--; - - } - - if(!frames_allocated.empty()) { - response.address = (frames_allocated.front())->starting_address; - response.pages = pages; - response.status = 1; - } - - return response; - -} - -// Allocate N contigiuous frames, returns the starting address if successfull, or -1 if it fails! -REQRESPONSE Pool::allocate_frame(int N) -{ - - REQRESPONSE response; - response.status = 0; - - - // Make sure we have free frames first - if(freelist.empty()) - return response; - - // For now, we will assume you can only allocate 1 frame, TODO: We will implemenet a buddy-allocator style that enables allocating contigous physical spaces - if(N>1) - return response; - - else - { - // Simply, pop the first free frame and assign it - uint64_t frameAddr = freelist.front(); - freelist.erase(freelist.begin()); - Frame *temp = new Frame(frameAddr,0); - alloclist[temp->starting_address] = temp; - available_frames--; - response.address = temp->starting_address; - response.pages = 1; - response.status = 1; - return response; - - } - -} - -/* Deallocate 'size' contigiuous memory of type 'memType' starting from physical address 'starting_pAddress', - * returns a structure which indicates whether the memory is successfully deallocated or not - */ -REQRESPONSE Pool::deallocate_frames(int pages, uint64_t starting_pAddress) -{ - - REQRESPONSE response; - int frames = pages; - uint64_t pAddress = starting_pAddress; - uint64_t frame_number; - - while(frames) { - - // If we can find the frame to be free in the allocated list - std::map::iterator it; - it = alloclist.find(pAddress); - if (it != alloclist.end()) - { - //Remove from allocation map and add to free list - Frame *temp = it->second; - freelist.push_back(temp->starting_address); - alloclist.erase(pAddress); - delete temp; - } - else - { - response.address = pAddress; //physical address of the frame which failed to deallocate. - response.pages = frames; //This indicates number of frames that are not deallocated. - response.status = 0; - return response; - } - - frame_number = (pAddress - start) / frsize * 1024; - pAddress += ((uint64_t) (frame_number+1)*frsize*1024) + start; //to get the next frame physical address - frames--; - } - - response.status = 1; //successfully deallocated - return response; -} - -// Freeing N frames starting from Address X, this will return -1 if we find that these frames were not allocated -REQRESPONSE Pool::deallocate_frame(uint64_t X, int N) -{ - - REQRESPONSE response; - response.status = 0; - - - // For now, we will assume you can free only 1 frame, TODO: We will implemenet a buddy-allocator style that enables allocating and freeing contigous physical spaces - if(N>1) - return response; - else - { - // If we can find the frame to be free in the allocated list - if(alloclist.find(X)!=alloclist.end()) - { - // Remove from allocation map and add to free list - Frame * temp = alloclist[X]; - freelist.push_back(temp->starting_address); - alloclist.erase(X); - delete temp; - available_frames++; - response.status = 1; - } - else // Means we couldn't find an allocated frame that is being unmapped - response.status = 0; - - } - - return response; -} - -bool Pool::isAllocated(uint64_t address) -{ - if(alloclist.find(address)==alloclist.end()) - return false; - - return true; -} - -/*REQRESPONSE Pool::allocate_frame_address(uint64_t address) -{ - - REQRESPONSE response; - response.status = 0; - - // Make sure we have free frames first - if(freelist.empty()) - return response; - - // For now, we will assume you can only allocate 1 frame, TODO: We will implemenet a buddy-allocator style that enables allocating contigous physical spaces - if(N>1) - return response; - - else - { - int index = freelist_index[address]; - Frame * temp = freelist[index]; - //freelist.remove(temp); - freelist.erase(freelist.begin()+index); - alloclist[temp->starting_address] = temp; - available_frames--; - response.address = temp->starting_address; - response.pages = 1; - response.status = 1; - return response; - - } - -}*/ - - diff --git a/src/sst/elements/opal/mempool.h b/src/sst/elements/opal/mempool.h deleted file mode 100644 index dab6bf8ebf..0000000000 --- a/src/sst/elements/opal/mempool.h +++ /dev/null @@ -1,151 +0,0 @@ -// Copyright 2009-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2024, NTESS -// All rights reserved. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. -// - -#include "opal_event.h" - -#include -#include -#include - - -typedef struct reqresponse { - uint64_t address; - int pages; - int status; - -}REQRESPONSE; - - -// This defines a physical frame of size 4KB by default -class Frame{ - - public: - // Constructor - Frame() { starting_address = 0; metadata = 0;} - - // Constructor with paramteres - Frame(uint64_t st, uint64_t md) { starting_address = st; metadata = 0;} - - ~Frame(){} - - // The starting address of the frame - uint64_t starting_address; - - // This will be used to store information about current allocation - int metadata; - - int frame_number; - -}; - - -// This class defines a memory pool - -class Pool{ - - public: - - //Constructor for pool - Pool(Params parmas, SST::OpalComponent::MemType mem_type, int id); - - ~Pool() { -/* while(!freelist.empty()) { - Frame* frame = freelist.front(); - freelist.erase(freelist.begin()); - delete frame; - } -*/ - std::map::iterator it; - for(it=alloclist.begin();it!=alloclist.end();it++) { - Frame* frame = it->second; - delete frame; - } - } - - void finish() {} - - // The size of the memory pool in KBs - uint32_t size; - - // The starting address of the memory pool - uint64_t start; - - // Allocate N contigiuous frames, returns the starting address if successfull, or -1 if it fails! - REQRESPONSE allocate_frame(int N); - - // Allocate 'size' contigiuous memory, returns a structure with starting address and number of frames allocated - REQRESPONSE allocate_frames(int pages); - - REQRESPONSE allocate_frame_address(uint64_t address, int N); - - // Freeing N frames starting from Address X, this will return -1 if we find that these frames were not allocated - REQRESPONSE deallocate_frame(uint64_t X, int N); - - // Deallocate 'size' contigiuous memory starting from physical address 'starting_pAddress', returns a structure which indicates success or not - REQRESPONSE deallocate_frames(int size, uint64_t starting_pAddress); - - bool isAllocated(uint64_t address); - - // Current number of free frames - int freeframes() { return freelist.size(); } - - // Frame size in KBs - int frsize; - - //Total number of frames - int num_frames; - - //real size of the memory pool - uint32_t real_size; - - //number of free frames - int available_frames; - - void set_memPool_type(SST::OpalComponent::MemType _memType) { memType = _memType; } - - SST::OpalComponent::MemType get_memPool_type() { return memType; } - - void set_memPool_tech(SST::OpalComponent::MemTech _memTech) { memTech = _memTech; } - - SST::OpalComponent::MemTech get_memPool_tech() { return memTech; } - - void setMemID(int id) { poolId = id; } - - int getMemID() { return poolId; } - - void build_mem(); - - void profileStats(int stat, int value); - - private: - - Output *output; - - //memory pool id - int poolId; - - //shared or local - SST::OpalComponent::MemType memType; - - //Memory technology - SST::OpalComponent::MemTech memTech; - - // The list of free frames - std::list freelist; - - //std::map freelist_index; - - // The list of allocated frames --- the key is the starting physical address - std::map alloclist; - -}; - diff --git a/src/sst/elements/opal/opal.cc b/src/sst/elements/opal/opal.cc deleted file mode 100644 index df3aa28048..0000000000 --- a/src/sst/elements/opal/opal.cc +++ /dev/null @@ -1,554 +0,0 @@ -// Copyright 2009-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2024, NTESS -// All rights reserved. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. -// - -#include -#include "opal.h" - -#include -#include - -using namespace SST; -using namespace SST::OpalComponent; - - -#define OPAL_VERBOSE(LEVEL, OUTPUT) if(verbosity >= (LEVEL)) OUTPUT - - -Opal::Opal(SST::ComponentId_t id, SST::Params& params): Component(id) { - - - registerAsPrimaryComponent(); - - verbosity = (uint32_t) params.find("verbose", 1); - output = new SST::Output("OpalComponent[@f:@l:@p] ", verbosity, 0, SST::Output::STDOUT); - - max_inst = (uint32_t) params.find("max_inst", 1); - num_nodes = (uint32_t) params.find("num_nodes", 1); - nodeInfo = new NodePrivateInfo*[num_nodes]; - num_cores = 0; - num_memCntrls = 0; - - cycles = 0; - - opalBase = new OpalBase(); - - char* buffer = (char*) malloc(sizeof(char) * 256); - size_t buffer_size = sizeof(char) * 256; - - /* Configuring shared memory */ - /*----------------------------------------------------------------------------------------*/ - num_shared_mempools = params.find("shared_mempools", 0); - std::cerr << getName().c_str() << "Number of Shared Memory Pools: "<< num_shared_mempools << endl; - - Params sharedMemParams = params.get_scoped_params("shared_mem"); - shared_mem_size = 0; - - sharedMemoryInfo = new MemoryPrivateInfo*[num_shared_mempools]; - - for(uint32_t i = 0; i < num_shared_mempools; i++) { - memset(buffer, 0 , 256); - snprintf(buffer, buffer_size, "mempool%" PRIu32 "", i); - Params memPoolParams = sharedMemParams.get_scoped_params(buffer); - sharedMemoryInfo[i] = new MemoryPrivateInfo(opalBase, i, memPoolParams); - std::cerr << getName().c_str() << "Configuring Shared " << buffer << std::endl; - shared_mem_size += memPoolParams.find("size", 0); - memset(buffer, 0 , 256); - snprintf(buffer, buffer_size, "globalMemCntrLink%" PRIu32, i); - sharedMemoryInfo[i]->link = configureLink(buffer, "1ns", new Event::Handler((sharedMemoryInfo[i]), &MemoryPrivateInfo::handleRequest)); - } - - /* Configuring nodes */ - /*----------------------------------------------------------------------------------------*/ - for(uint32_t i = 0; i < num_nodes; i++) { - memset(buffer, 0 , 256); - snprintf(buffer, buffer_size, "node%" PRIu32 "", i); - Params nodePrivateParams = params.get_scoped_params(buffer); - nodeInfo[i] = new NodePrivateInfo(opalBase, i, nodePrivateParams); - for(uint32_t j=0; jcores; j++) { - memset(buffer, 0 , 256); - snprintf(buffer, buffer_size, "coreLink%" PRIu32, num_cores); - nodeInfo[i]->coreInfo[j].coreLink = configureLink(buffer, "1ns", new Event::Handler((&nodeInfo[i]->coreInfo[j]), &CorePrivateInfo::handleRequest)); - memset(buffer, 0 , 256); - snprintf(buffer, buffer_size, "mmuLink%" PRIu32, num_cores); - nodeInfo[i]->coreInfo[j].mmuLink = configureLink(buffer, "1ns", new Event::Handler((&nodeInfo[i]->coreInfo[j]), &CorePrivateInfo::handleRequest)); - num_cores++; - } - for(uint32_t j=0; jmemory_cntrls; j++) { - memset(buffer, 0 , 256); - snprintf(buffer, buffer_size, "memCntrLink%" PRIu32, num_memCntrls); - nodeInfo[i]->memCntrlInfo[j].link = configureLink(buffer, "1ns", new Event::Handler((&nodeInfo[i]->memCntrlInfo[j]), &MemoryPrivateInfo::handleRequest)); - num_memCntrls++; - } - - char* subID = (char*) malloc(sizeof(char) * 32); - snprintf(subID, sizeof(char) * 32, "%" PRIu32, i); - nodeInfo[i]->statLocalMemUsage = registerStatistic("local_mem_usage", subID ); - nodeInfo[i]->statSharedMemUsage = registerStatistic("shared_mem_usage", subID ); - free(subID); - } - - free(buffer); - - /* registering clock */ - /*----------------------------------------------------------------------------------------*/ - std::string cpu_clock = params.find("clock", "1GHz"); - std::cerr << "clock: "<< cpu_clock.c_str() << std::endl; - registerClock( cpu_clock, new Clock::Handler(this, &Opal::tick ) ); -} - - - -Opal::Opal() : Component(-1) -{ - // for serialization only - // -} - - -void Opal::setNextMemPool( int node, int fault_level ) -{ - switch(nodeInfo[node]->memoryAllocationPolicy) - { - case 8: - //alternate allocation policy 1:16 - nodeInfo[node]->nextallocmem = ( nodeInfo[node]->nextallocmem + 1 ) % 17; - nodeInfo[node]->allocatedmempool = nodeInfo[node]->nextallocmem; - break; - - case 7: - //alternate allocation policy 1:8 - nodeInfo[node]->nextallocmem = ( nodeInfo[node]->nextallocmem + 1 ) % 9; - nodeInfo[node]->allocatedmempool = nodeInfo[node]->nextallocmem; - break; - - case 6: - //alternate allocation policy 1:4 - nodeInfo[node]->nextallocmem = ( nodeInfo[node]->nextallocmem + 1 ) % 5; - nodeInfo[node]->allocatedmempool = nodeInfo[node]->nextallocmem; - break; - - case 5: - //alternate allocation policy 1:2 - nodeInfo[node]->nextallocmem = ( nodeInfo[node]->nextallocmem + 1 ) % 3; - nodeInfo[node]->allocatedmempool = nodeInfo[node]->nextallocmem; - break; - - case 4: - //random allocation policy - nodeInfo[node]->allocatedmempool = rand() % ( num_shared_mempools + 1 ); - break; - - case 3: - //proportional allocation policy - nodeInfo[node]->nextallocmem = ( nodeInfo[node]->nextallocmem + 1 ) % ( shared_mem_size/nodeInfo[node]->memory_size + 1 ); - nodeInfo[node]->allocatedmempool = nodeInfo[node]->nextallocmem ? ( nodeInfo[node]->allocatedmempool + 1 ) % ( num_shared_mempools + 1) - ? ( nodeInfo[node]->allocatedmempool + 1 ) % ( num_shared_mempools + 1) : 1 : 0; - - break; - - case 2: - //round robin allocation policy - nodeInfo[node]->nextallocmem = ( nodeInfo[node]->nextallocmem + 1 ) % ( num_shared_mempools + 1 ); - nodeInfo[node]->allocatedmempool = nodeInfo[node]->nextallocmem; - break; - - case 1: - //alternate allocation policy - if( nodeInfo[node]->allocatedmempool != 0) { - nodeInfo[node]->allocatedmempool = 0; - } else { - nodeInfo[node]->nextallocmem = ( nodeInfo[node]->nextallocmem + 1 ) % ( num_shared_mempools + 1 ) - ? ( nodeInfo[node]->nextallocmem + 1 ) % ( num_shared_mempools + 1 ) : 1; - nodeInfo[node]->allocatedmempool = nodeInfo[node]->nextallocmem; - } - break; - - case 0: - default: - //local memory first - nodeInfo[node]->allocatedmempool = 0; - break; - - } - -} - -void Opal::processHint(int node, int fileId, uint64_t vAddress, int size) -{ - - std::map*, std::vector* > >::iterator fileIdHint = opalBase->mmapFileIdHints.find(fileId); - - //fileId is already registered by another node - if( fileIdHint != opalBase->mmapFileIdHints.end() ) - { - //search for nodeId - std::vector* it = (fileIdHint->second).first; - auto it1 = std::find(it->begin(), it->end(), node); - if( it1 != it->end() ) - { - std::cerr << "Memory is already allocated for fileId: " << fileIdHint->first << " in the same node" << std::endl; - } - else - { - int owner_node = it->back(); - if( nodeInfo[owner_node]->page_size != nodeInfo[node]->page_size) - output->fatal(CALL_INFO, -1, "Opal: Page sizes of the owner node which reserved space with fileId: %d and the requesting node are not same\n",fileId); - - it->push_back(node); - //(fileIdHint->second).first = it; - nodeInfo[node]->reservedSpace.insert(std::make_pair(vAddress/4096, std::make_pair(fileId, std::make_pair( ceil(size/(nodeInfo[node]->page_size)), 0)))); - } - } - else - { - std::vector *it = new std::vector; - std::vector *pa = new std::vector; - - it->push_back(node); - opalBase->mmapFileIdHints.insert(std::make_pair(fileId, std::make_pair( it, pa ))); - nodeInfo[node]->reservedSpace.insert(std::make_pair(vAddress/4096, std::make_pair(fileId, std::make_pair( ceil(size/(nodeInfo[node]->page_size)), 0)))); - - } -} - -REQRESPONSE Opal::isAddressReserved(int node, uint64_t vAddress) -{ - REQRESPONSE response; - response.status = 0; - - - for (std::map > >::iterator it= (nodeInfo[node]->reservedSpace).begin(); it!=(nodeInfo[node]->reservedSpace).end(); ++it) - { - uint64_t reservedVAddress = it->first; - int pages_reserved = (it->second).second.first; - if(reservedVAddress <= vAddress && vAddress < reservedVAddress + pages_reserved*nodeInfo[node]->page_size) { - response.status = 1; - response.address = reservedVAddress; - } - } - - return response; -} - -REQRESPONSE Opal::allocateSharedMemory(int node, int coreId, uint64_t vAddress, int fault_level, int pages) -{ - REQRESPONSE response; - response.status = 0; - - int sharedMemPoolId; - - if(nodeInfo[node]->memoryAllocationPolicy) { - - sharedMemPoolId = nodeInfo[node]->allocatedmempool - 1; - - } - else { - - for(uint32_t i = 0; ipool->available_frames >= pages ) - { - Pool *pool = sharedMemoryInfo[i]->pool; - for(int j=0; jallocate_frame(1); - if(!response.status) - output->fatal(CALL_INFO, -1, "Opal: Allocating shared memory. This should never happen\n"); - - nodeInfo[node]->profileEvent(SST::OpalComponent::MemType::SHARED); - } - - response.pages = pages; - response.status = 1; - break; - } - } - - if(!response.status) - output->fatal(CALL_INFO, -1, "Opal(%s): Memory is drained out\n",getName().c_str()); - - return response; - } - - if( sharedMemoryInfo[sharedMemPoolId]->pool->available_frames >= pages ) { - Pool *pool = sharedMemoryInfo[sharedMemPoolId]->pool; - for(int j=0; jallocate_frame(1); - if(!response.status) - output->fatal(CALL_INFO, -1, "Opal: Allocating shared memory. This should never happen\n"); - - nodeInfo[node]->profileEvent(SST::OpalComponent::MemType::SHARED); - } - - setNextMemPool( node,fault_level ); - response.pages = pages; - response.status = 1; - } - else - { - for(uint32_t i = 0; i < num_shared_mempools; i++) - { - setNextMemPool(node,fault_level); - if( !nodeInfo[node]->allocatedmempool ) // skip local memory - setNextMemPool( node,fault_level ); - - sharedMemPoolId = nodeInfo[node]->allocatedmempool - 1; - - if( sharedMemoryInfo[sharedMemPoolId]->pool->available_frames >= pages ) { - Pool *pool = sharedMemoryInfo[sharedMemPoolId]->pool; - for(int j=0; jallocate_frame(1); - if(!response.status) - output->fatal(CALL_INFO, -1, "Opal: Allocating shared memory. This should never happen\n"); - - nodeInfo[node]->profileEvent(SST::OpalComponent::MemType::SHARED); - } - - setNextMemPool( node,fault_level ); - response.pages = pages; - response.status = 1; - break; - } - } - - if(!response.status) - output->fatal(CALL_INFO, -1, "Opal: Memory is drained out\n"); - - } - - return response; -} - -REQRESPONSE Opal::allocateLocalMemory(int node, int coreId, uint64_t vAddress, int fault_level, int pages) -{ - - REQRESPONSE response; - response.status = 0; - - - if(nodeInfo[node]->pool->available_frames >= pages) { - Pool *pool = nodeInfo[node]->pool; - for(int i=0; iallocate_frame(1); - if(!response.status) - output->fatal(CALL_INFO, -1, "Opal: Allocating local memory. This should never happen\n"); - - nodeInfo[node]->profileEvent(SST::OpalComponent::MemType::LOCAL); - } - - response.pages = pages; - response.status = 1; - setNextMemPool( node,fault_level ); - } - else { - OPAL_VERBOSE(8, output->verbose(CALL_INFO, 8, 0, "Node%" PRIu32 " Local Memory is drained out\n", node)); - - setNextMemPool( node,fault_level ); - response = allocateSharedMemory(node, coreId, vAddress, fault_level, pages); - } - - - return response; - -} - -REQRESPONSE Opal::allocateFromReservedMemory(int node, uint64_t reserved_vAddress, uint64_t vAddress, int pages) -{ - REQRESPONSE response; - response.status = 0; - - int fileID = nodeInfo[node]->reservedSpace[reserved_vAddress].first; - int pages_reserved = nodeInfo[node]->reservedSpace[reserved_vAddress].second.first; - int pages_used = nodeInfo[node]->reservedSpace[reserved_vAddress].second.second; - - std::vector *reserved_pAddress = opalBase->mmapFileIdHints[fileID].second; - - //Allocate all the pages. TODO: pages can be reserved on demand instead of allocating all the pages at a time. But what if the memory is drained out. - if(reserved_pAddress->empty()) { - - for(uint32_t i = 0; ipool->available_frames >= pages ) { - Pool *pool = sharedMemoryInfo[i]->pool; - for(int j=0; jallocate_frame(1); - reserved_pAddress->push_back( response.address ); - - if(!response.status) - output->fatal(CALL_INFO, -1, "Opal: Allocating reserved memory. This should never happen\n"); - } - - response.pages = pages; - response.status = 1; - break; - } - - } - - if(!response.status) - output->fatal(CALL_INFO, -1, "Opal: memory not available to allocate memory for file ID: %d \n", fileID); - - } - - if( pages_used + pages <= pages_reserved ) - { - - auto it = reserved_pAddress->begin(); - std::advance(it, pages_used); - response.address = *it; - response.pages = pages; - response.status = 1; - nodeInfo[node]->reservedSpace[reserved_vAddress].second.second += pages; - - } - else - { - output->fatal(CALL_INFO, -1, "Opal: address :%" PRIu64 " requested with fileId:%d has no space left\n", vAddress, fileID); - } - - return response; -} - -bool Opal::processRequest(int node, int coreId, uint64_t vAddress, int fault_level, int size) -{ - - REQRESPONSE response; - response.status = 0; - - int pages = ceil(size/(nodeInfo[node]->page_size)); - - // If multiple pages are requested how are the physical addresses sent to the requester as in future sue to opal parallelization continuous addresses cannot be allocated - if(pages != 1) - output->fatal(CALL_INFO, -1, "Opal: currently opal does not support multiple page allocations\n"); - - // if the page fault request is for CR3 register allocate the memory from local memory - if(4 == fault_level) - response = allocateLocalMemory(node, coreId, vAddress, fault_level, pages); - else - { - - // check if memory is to be allocated from the reserved address space - response = isAddressReserved(node, vAddress); - - if( response.status ) - response = allocateFromReservedMemory(node, response.address, vAddress, pages); - - else { - if( !nodeInfo[node]->allocatedmempool ) { - response = allocateLocalMemory(node, coreId, vAddress, fault_level, pages); - //std::cerr << getName() << " Node: " << node << " core " << coreId << " response page address: " << vAddress << " allocated local address: " << response.address << " pages: "<< pages << " level: " << fault_level << std::endl; - } - else { - response = allocateSharedMemory(node, coreId, vAddress, fault_level, pages); - //std::cerr << getName() << " Node: " << node << " core " << coreId << " response page address: " << vAddress << " allocated shared address: " << response.address << " pages: " << " level: " << fault_level << std::endl; - } - } - } - - if( response.status ) { - OpalEvent *tse = new OpalEvent(EventType::RESPONSE); - tse->setResp(vAddress, response.address, response.pages*nodeInfo[node]->page_size); - tse->setCoreId(coreId); - nodeInfo[node]->coreInfo[coreId].mmuLink->send(tse); - } - else - output->fatal(CALL_INFO, -1, "Opal(%s): Memory is drained out\n",getName().c_str()); - - return true; - -} - - -bool Opal::tick(SST::Cycle_t x) -{ - cycles++; - - int inst_served = 0; - while(!opalBase->requestQ.empty()) { - if(inst_served < max_inst) { - OpalEvent *ev = opalBase->requestQ.front(); - bool removeEvent = true; - - switch(ev->getType()) { - case SST::OpalComponent::EventType::HINT: - { - std::cerr << getName().c_str() << " node: " << ev->getNodeId() << " core: "<< ev->getCoreId() << " request page address: " << ev->getAddress() << " hint" << std::endl; - } - break; - - case SST::OpalComponent::EventType::MMAP: - { - OPAL_VERBOSE(8, output->verbose(CALL_INFO, 8, 0, "Node%" PRIu32 " Opal has received an MMAP CALL\n", ev->getNodeId())); - std::cerr << "MLM mmap(" << ev->getFileId()<< ") : level "<< ev->getHint() << " Starting address is "<< std::hex << ev->getAddress(); - std::cerr << std::dec << " Size: "<< ev->getSize(); - std::cerr << " Ending address is " << std::hex << ev->getAddress() + ev->getSize() - 1; - std::cerr << std::dec << std::endl; - //size should be in the multiple of page size (4096) from ariel core - //processHint(ev->getNodeId(), ev->getFileId(), ev->getAddress(), ev->getSize()); - } - break; - - case SST::OpalComponent::EventType::UNMAP: - { - std::cerr << getName().c_str() << " node: " << ev->getNodeId() << " core: "<< ev->getCoreId() << " request page address: " << ev->getAddress() << " unmap"<< std::endl; - OPAL_VERBOSE(8, output->verbose(CALL_INFO, 8, 0, "Node%" PRIu32 " Opal has received an UNMAP CALL\n", ev->getNodeId())); - } - break; - - case SST::OpalComponent::EventType::REQUEST: - { - removeEvent = processRequest(ev->getNodeId(), ev->getCoreId(), ev->getAddress(), ev->getFaultLevel(), ev->getSize()); - } - break; - - default: - output->fatal(CALL_INFO, -1, "%s, Error - Unknown request\n", getName().c_str()); - break; - - } - - if(!removeEvent) { - break; - } - - opalBase->requestQ.pop(); - delete ev; - inst_served++; - } - else { - output->verbose(CALL_INFO, 2, 0, "%s, number of requests served has reached maximum width in the given time slot \n", getName().c_str()); - break; - } - } - - return false; -} - - -void Opal::finish() -{ - for(uint32_t i = 0; i < num_nodes; i++ ) - nodeInfo[i]->pool->finish(); - - for(uint32_t i = 0; i < num_shared_mempools; i++ ) - sharedMemoryInfo[i]->pool->finish(); - -} - -void Opal::deallocateSharedMemory(uint64_t page, int N) -{ - for(uint32_t sm=0; smcontains(page)) { - sharedMemoryInfo[sm]->pool->deallocate_frame(page, 1); - break; - } -} - diff --git a/src/sst/elements/opal/opal.h b/src/sst/elements/opal/opal.h deleted file mode 100644 index 00ea9c36c9..0000000000 --- a/src/sst/elements/opal/opal.h +++ /dev/null @@ -1,379 +0,0 @@ -// Copyright 2009-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2024, NTESS -// All rights reserved. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. -// - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "opal_event.h" -#include "mempool.h" - -using namespace SST; - -using namespace SST::OpalComponent; - - -namespace SST -{ - namespace OpalComponent - { - - class OpalBase { - - public: - OpalBase() { } - - ~OpalBase() { - - while( !requestQ.empty() ) { - delete requestQ.front(); - requestQ.pop(); - } - - std::map*, std::vector* > >::iterator it; - for(it=mmapFileIdHints.begin(); it!=mmapFileIdHints.end(); it++){ - delete (it->second).second; - delete (it->second).first; - } - } - - std::queue requestQ; // stores page fault requests, hints and shootdown acknowledgement events from all the cores - - std::map*, std::vector* > > mmapFileIdHints; // used to store reserved memory which is useful for inter-node communication - }; - - class MemoryPrivateInfo - { - public: - - OpalBase *opalBase; - - int nodeId; - - int memContrlId; - - SST::Link * link; - - unsigned int latency; - - Pool* pool; - - MemoryPrivateInfo() { } - - MemoryPrivateInfo(OpalBase *base, uint32_t _id, Params params) - { - memContrlId = _id; - opalBase = base; - latency = (uint32_t) params.find("latency", 1); - pool = new Pool(params, SST::OpalComponent::MemType::SHARED, _id); - } - - ~MemoryPrivateInfo() { - delete pool; - } - - void setOwner(OpalBase *base) { opalBase = base; } - - void handleRequest(SST::Event* e) - { - OpalEvent *ev = static_cast (e); - ev->setMemContrlId(memContrlId); - delete ev; // delete event from memory controller for now - } - - bool contains(uint64_t page) - { - return ((pool->start <= page) && (page < pool->start + pool->num_frames*pool->frsize)) ? true : false; - } - }; - - class CorePrivateInfo - { - public: - - OpalBase *opalBase; - - int nodeId; - - int coreId; - - uint64_t cr3; - - SST::Link * coreLink; - - SST::Link * mmuLink; - - float ipc; - - CorePrivateInfo() { } - - ~CorePrivateInfo() { } - - unsigned int latency; - - void setOwner(OpalBase *base) { opalBase = base; } - - void handleRequest(SST::Event* e) - { - OpalEvent *ev = static_cast (e); - ev->setNodeId(nodeId); - ev->setCoreId(coreId); - opalBase->requestQ.push(ev); - } - - }; - - class NodePrivateInfo - { - public: - - OpalBase *opalBase; - - uint32_t clock; // clock rate of Opal - - uint32_t node_num; // stores the node number of this node - - uint32_t cores; // stores number of cores in this node - - uint32_t memory_cntrls; // stores number of memory controllers available in this node - - uint32_t latency; // latency to communicate with Core, MMU and Memory controller units - - CorePrivateInfo *coreInfo; // stores core specific information of this node - - MemoryPrivateInfo *memCntrlInfo; // stores memory controller information of this node - - uint32_t memoryAllocationPolicy; // used for deciding memory allocation policies - - int nextallocmem; // stores next memory pool to allocate memory from - - int allocatedmempool; // used to store current allocated memory pool - - Pool* pool; // local memory pool which maintains memory utilization - allocated and free pages - - uint64_t page_size; // page size of the node in KB's - - uint64_t memory_size; // local memory size in terms of number of pages - - uint64_t num_pages; // number of pages in local memory - - uint32_t pages_available; // used to check number of pages free in local memory - - std::map > > reservedSpace; // stores pages that are reserved by nodes. these can be shared by other nodes for inter-node communication. fileds: virtual address, fileId, size - - Statistic* statLocalMemUsage; - Statistic* statSharedMemUsage; - - NodePrivateInfo(OpalBase *base, uint32_t node, Params params) - { - opalBase = base; - node_num = node; - clock = (uint32_t) params.find("clock", 2000); // in MHz - cores = (uint32_t) params.find("cores", 1); - memory_cntrls = (uint32_t) params.find("memory_cntrls", 1); - latency = (uint32_t) params.find("latency", 2000); //2us - - memoryAllocationPolicy = (uint32_t) params.find("allocation_policy", 0); - nextallocmem = 0; - allocatedmempool = 0; - - pool = new Pool((Params) params.get_scoped_params("memory"), SST::OpalComponent::MemType::LOCAL, node); - memory_size = (uint32_t) params.find("memory.size", 1); // in KB's - page_size = (uint32_t) params.find("memory.frame_size", 4); - page_size = page_size * 1024; - - coreInfo = new CorePrivateInfo[cores]; - for(uint32_t i=0; iaddData(1); - } - else{ - statSharedMemUsage->addData(1); - } - } - - }; - - class Opal : public SST::Component - { - public: - - Opal( SST::ComponentId_t id, SST::Params& params); - - void setup() { }; - - void finish(); - - bool tick(SST::Cycle_t x); - - void setNextMemPool( int node,int fault_level ); - - REQRESPONSE allocateLocalMemory(int node, int coreId, uint64_t vAddress, int fault_level, int pages); - - REQRESPONSE allocateSharedMemory(int node, int coreId, uint64_t vAddress, int fault_level, int pages); - - REQRESPONSE allocateFromReservedMemory(int node, uint64_t reserved_vAddress, uint64_t vAddress, int pages); - - REQRESPONSE isAddressReserved(int node, uint64_t vAddress); - - bool processRequest(int node, int coreId, uint64_t vAddress, int fault_level, int size); - - void processHint(int node, int fileId, uint64_t vAddress, int size); - - void deallocateSharedMemory(uint64_t page, int N); - - ~Opal() { - for(uint32_t i=0; i -#include "opalMemNIC.h" - -using namespace SST; -using namespace SST::Opal; - -/* Constructor */ - -OpalMemNIC::OpalMemNIC(ComponentId_t id, Params ¶ms, TimeConverter* tc) : SST::MemHierarchy::MemNICBase(id, params, tc) { - - node = params.find("node", 0); - enable = params.find("shared_memory", true); - localMemSize = params.find("local_memory_size", 0); - - /* Set up link control */ - link_control = loadUserSubComponent("linkcontrol", ComponentInfo::SHARE_NONE, 1); - if (!link_control) { - Params lcparams; - lcparams.insert("port_name", "port"); - lcparams.insert("link_bw", params.find("network_bw", "80GiB/s")); - lcparams.insert("in_buf_size", params.find("network_input_buffer_size", "1KiB")); - lcparams.insert("out_buf_size", params.find("network_output_buffer_size", "1KiB")); - std::string lcSub = params.find("linkcontrol", "merlin.linkcontrol"); - link_control = loadAnonymousSubComponent(lcSub, "linkcontrol", 0, ComponentInfo::SHARE_PORTS | ComponentInfo::INSERT_STATS, lcparams, 1); - } - link_control->setNotifyOnReceive(new SST::Interfaces::SimpleNetwork::Handler(this, &OpalMemNIC::recvNotify)); - - packetHeaderBytes = extractPacketHeaderSize(params, "min_packet_size"); -} - -void OpalMemNIC::init(unsigned int phase) { - link_control->init(phase); - MemNICBase::nicInit(link_control, phase); -} - -bool OpalMemNIC::clock() { - if (sendQueue.empty()) return true; - drainQueue(&sendQueue, link_control); - return false; -} - -bool OpalMemNIC::recvNotify(int) { - MemRtrEvent * mre = doRecv(link_control); - if (mre) { - MemHierarchy::MemEventBase * me = mre->takeEvent(); - delete mre; - if (me) { - (*recvHandler)(me); - } - } - return true; -} - -void OpalMemNIC::send(MemHierarchy::MemEventBase * ev) { - SST::Interfaces::SimpleNetwork::Request * req = new SST::Interfaces::SimpleNetwork::Request(); - MemRtrEvent * mre = new MemRtrEvent(ev); - req->src = info.addr; - req->dest = lookupNetworkAddress(ev->getDst()); - req->size_in_bits = 8 * (packetHeaderBytes + ev->getPayloadSize()); - req->vn = 0; - req->givePayload(mre); - sendQueue.push(req); -} - - -/* Add 'node' to InitMemRtrEvent */ -MemHierarchy::MemNICBase::InitMemRtrEvent * OpalMemNIC::createInitMemRtrEvent() { - return new OpalInitMemRtrEvent(info, node); -} - -void OpalMemNIC::processInitMemRtrEvent(MemHierarchy::MemNICBase::InitMemRtrEvent * ev) { - OpalInitMemRtrEvent* imre = static_cast(ev); - dbg.debug(_L10_, "%s (OpalMemNIC) received imre. Name: %s, Addr: %" PRIu64 ", ID: %" PRIu32 ", start: %" PRIu64 ", end: %" PRIu64 ", size: %" PRIu64 ", step: %" PRIu64 ", node: %" PRIu32 "\n", - getName().c_str(), imre->info.name.c_str(), imre->info.addr, imre->info.id, imre->info.region.start, imre->info.region.end, imre->info.region.interleaveSize, imre->info.region.interleaveStep, imre->node); - - if (sourceIDs.find(imre->info.id) != sourceIDs.end()) { // From one of our source groups - dbg.debug(_L10_, "\tAdding to sourceEndpointInfo. %zu sources found\n", sourceEndpointInfo.size()); - addSource(imre->info); - } else if (destIDs.find(imre->info.id) != destIDs.end()) { // From one of our dest groups - // Filter by node if sharedmem is enabled - if (enable) { - if (imre->node == node || imre->node == 9999) { - dbg.debug(_L10_, "\tAdding to destEndpointInfo. %zu destinations found\n", destEndpointInfo.size()); - addDest(imre->info); - } - } else { - dbg.debug(_L10_, "\tAdding to destEndpointInfo. %zu destinations found\n", destEndpointInfo.size()); - addDest(imre->info); - } - } -} - - -std::string OpalMemNIC::findTargetDestination(MemHierarchy::Addr addr) { - for (std::set::const_iterator it = destEndpointInfo.begin(); it != destEndpointInfo.end(); it++) { - if (it->region.contains(addr)) return it->name; - } - - if (enable && localMemSize) { - MemHierarchy::Addr tempAddr = addr & (localMemSize-1); - for (std::set::const_iterator it = destEndpointInfo.begin(); it != destEndpointInfo.end(); it++) { - if(it->region.contains(tempAddr)) return it->name; - } - } - - /* Build error string */ - stringstream error; - error << getName() + " (OpalMemNIC) cannot find a destination for address " << addr << endl; - error << "Known destination regions: " << endl; - for (std::set::const_iterator it = destEndpointInfo.begin(); it != destEndpointInfo.end(); it++) { - error << it->name << " " << it->region.toString() << endl; - } - dbg.fatal(CALL_INFO, -1, "%s", error.str().c_str()); - return ""; -} diff --git a/src/sst/elements/opal/opalMemNIC.h b/src/sst/elements/opal/opalMemNIC.h deleted file mode 100644 index 2ac5a4cb64..0000000000 --- a/src/sst/elements/opal/opalMemNIC.h +++ /dev/null @@ -1,114 +0,0 @@ -// Copyright 2013-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2013-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _OPAL_MEMNIC_SUBCOMPONENT_H_ -#define _OPAL_MEMNIC_SUBCOMPONENT_H_ - -#include -#include - -#include "sst/elements/memHierarchy/memNICBase.h" - -namespace SST { -namespace Opal { - -/* - * NIC for multi-node configurations - */ -class OpalMemNIC : public SST::MemHierarchy::MemNICBase { - -public: -/* Element Library Info */ -#define OPAL_MEMNIC_ELI_PARAMS MEMNICBASE_ELI_PARAMS, \ - { "node", "Node number in multinode environment", "0"},\ - { "shared_memory", "Shared meory enable flag", "0"},\ - { "local_memory_size", "Local memory size to mask local memory addresses", "0"},\ - { "network_bw", "Network bandwidth", "80GiB/s" },\ - { "network_input_buffer_size", "Size of input buffer", "1KiB" },\ - { "network_output_buffer_size", "Size of output buffer", "1KiB" },\ - { "min_packet_size", "Size of packet with a payload (e.g., control message size)", "8B"} - - - SST_ELI_REGISTER_SUBCOMPONENT(OpalMemNIC, "Opal", "OpalMemNIC", SST_ELI_ELEMENT_VERSION(1,0,0), - "MemNIC for Opal multi-node configurations", SST::MemHierarchy::MemLinkBase) - - SST_ELI_DOCUMENT_PARAMS( OPAL_MEMNIC_ELI_PARAMS ) - - SST_ELI_DOCUMENT_PORTS( {"port", "Link to network", {"memHierarchy.MemRtrEvent"} } ) - -/* Begin class definition */ - - /* Constructor */ - OpalMemNIC(ComponentId_t id, Params ¶ms, TimeConverter* tc); - - /* Destructor */ - virtual ~OpalMemNIC() { } - - /* Specialized init mem rtr event to specify node */ - class OpalInitMemRtrEvent : public MemHierarchy::MemNICBase::InitMemRtrEvent { - public: - uint32_t node; - - OpalInitMemRtrEvent() {} - OpalInitMemRtrEvent(EndpointInfo info, uint32_t node) : InitMemRtrEvent(info), node(node) { } - - virtual Event* clone(void) override { - OpalInitMemRtrEvent * imre = new OpalInitMemRtrEvent(*this); - if (this->event != nullptr) - imre->event = this->event->clone(); - else - imre->event = nullptr; - return imre; - } - - virtual bool hasClientData() const override { return false; } - - void serialize_order(SST::Core::Serialization::serializer &ser) override { - InitMemRtrEvent::serialize_order(ser); - ser & node; - } - - ImplementSerializable(SST::Opal::OpalMemNIC::OpalInitMemRtrEvent); - }; - - bool clock(); - void send(MemHierarchy::MemEventBase *ev); - - bool recvNotify(int); - - void init(unsigned int phase); - void finish() { link_control->finish(); } - void setup() { link_control->setup(); MemLinkBase::setup(); } - - virtual std::string findTargetDestination(MemHierarchy::Addr addr); - -protected: - virtual MemHierarchy::MemNICBase::InitMemRtrEvent* createInitMemRtrEvent(); - virtual void processInitMemRtrEvent(MemHierarchy::MemNICBase::InitMemRtrEvent* ev); - -private: - bool enable; - uint64_t localMemSize; - uint32_t node; - - size_t packetHeaderBytes; - SST::Interfaces::SimpleNetwork * link_control; - std::queue sendQueue; -}; - -} //namespace Opal -} //namespace SST - -#endif diff --git a/src/sst/elements/opal/opal_event.h b/src/sst/elements/opal/opal_event.h deleted file mode 100644 index 8f1f9bf2bd..0000000000 --- a/src/sst/elements/opal/opal_event.h +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright 2009-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2024, NTESS -// All rights reserved. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. -// - -#ifndef _H_SST_OPAL_EVENT -#define _H_SST_OPAL_EVENT - -#include -#include -#include <../memHierarchy/memEvent.h> - -#include -#include -#include - -using namespace SST; - - -namespace SST{ namespace OpalComponent{ - - enum EventType { HINT, MMAP, REQUEST, RESPONSE, UNMAP, UMAPACK, SHOOTDOWN, REMAP, SDACK, ARIEL_ENABLED, HALT, PAGE_REFERENCE, PAGE_REFERENCE_END, IPC_INFO }; - enum MemType { LOCAL, SHARED }; - enum MemTech { DRAM, NVM, HBM, HMC, SCRATCHPAD, BURSTBUFFER}; - -// **************** Important ***************** -// Levels hints are: 0 for DRAM -// 1: NVM -// 2: HBM -// 3: HMC -// 4: SCRATCHPAD -// 5: BURSTBUFFER - - // Thie defines a class for events of Opal - class OpalEvent : public SST::Event - { - - private: - OpalEvent() {} // For serialization - - EventType ev; - uint64_t address; - uint64_t paddress; - int faultLevel; - int size; // to redcue packet size, size valiable we used size variable for multiple pusrposes. 1. size of the page fault. 2. shoowdownID. 3. local and global page reference - uint32_t nodeId; - uint32_t coreId; - MemType memType; - int hint; - int fileId; - int memContrlId; - bool invalidate; - - public: - - OpalEvent(EventType y) : SST::Event() - { ev = y; memType = SST::OpalComponent::MemType::LOCAL; invalidate = false;} - - OpalEvent(EventType y, const uint32_t level, const uint64_t virtualAddress, const uint64_t size_, const uint32_t thread) : SST::Event() - { - ev = y; - memType = SST::OpalComponent::MemType::LOCAL; - invalidate = false; - size =size_; - faultLevel = level; - address = virtualAddress; - coreId = thread; - } - - void setType(int ev1) { ev = static_cast(ev1);} - int getType() { return ev; } - - void setMemType(int mtype) { memType = static_cast(mtype);} - MemType getMemType() { return memType; } - - void setNodeId(uint32_t id) { nodeId = id; } - uint32_t getNodeId() { return nodeId; } - - void setCoreId(uint32_t id) { coreId = id; } - uint32_t getCoreId() { return coreId; } - - void setResp(uint64_t add, uint64_t padd, int sz) { address = add; paddress = padd; size = sz;} - - void setAddress(uint64_t add) { address = add; } - uint64_t getAddress() { return address; } - - void setPAddress(uint64_t add) { paddress = add; } - uint64_t getPaddress() { return paddress; } - - void setSize(int size_) { size = size_; } - int getSize() { return size; } - - void setFaultLevel(int level) { faultLevel = level; } - int getFaultLevel() { return faultLevel; } - - void setInvalidate() { invalidate = true; } - bool getInvalidate() { return invalidate; } - - void setFileId(int id) { fileId = id; } - int getFileId() { return fileId; } - - void setHint(int x) { hint = x; } - int getHint() { return hint; } - - void setMemContrlId(int id) { memContrlId = id; } - int getMemContrlId() { return memContrlId; } - - void serialize_order(SST::Core::Serialization::serializer &ser) override { - Event::serialize_order(ser); - ser & ev; - ser & address; - ser & paddress; - ser & faultLevel; - ser & size; - ser & nodeId; - ser & coreId; - ser & memType; - ser & hint; - ser & fileId; - ser & memContrlId; - ser & invalidate; - } - - - ImplementSerializable(OpalEvent); - - }; - - -}} - -#endif - diff --git a/src/sst/elements/opal/page_fault_handler.cc b/src/sst/elements/opal/page_fault_handler.cc deleted file mode 100644 index bfe53b6133..0000000000 --- a/src/sst/elements/opal/page_fault_handler.cc +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2009-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -/* Author: Vamsee Reddy Kommareddy - * E-mail: vamseereddy8@knights.ucf.edu - */ - -#include - -#include "page_fault_handler.h" -#include "opal_event.h" - -#include - - -using namespace SST::OpalComponent; - -PageFaultHandler::PageFaultHandler(ComponentId_t id, Params& params) : - SambaComponent::PageFaultHandler(id, params) { - - // Find links - std::string linkprefix = "opal_link_"; - std::string linkname = linkprefix + "0"; - int numPorts = 0; - - std::string latency = params.find("opal_latency", "32ps"); - - while (isPortConnected(linkname)) { - SST::Link* link = configureLink(linkname, latency, new Event::Handler(this, &PageFaultHandler::handleEvent)); - opalLink.push_back(link); - numPorts++; - linkname = linkprefix + std::to_string(numPorts); - } -} - - -PageFaultHandler::~PageFaultHandler() { -} - - -void PageFaultHandler::handleEvent(SST::Event *event) { - OpalEvent * ev = static_cast(event); - output->verbose(CALL_INFO, 4, 0, "Core %" PRIu32 " handling opal page fault event\n", ev->getCoreId()); - - PageFaultHandlerPacket pkt; - - switch(ev->getType()) { - case SST::OpalComponent::EventType::RESPONSE: - pkt.action = PageFaultHandlerAction::RESPONSE; - break; - default: - output->fatal(CALL_INFO, -4, "Opal event interrupt to core: %" PRIu32 " was not valid.\n", ev->getCoreId()); - } - - pkt.vAddress = ev->getAddress(); - pkt.pAddress = ev->getPaddress(); - pkt.size = 4096; - (*(pageFaultHandlerInterface[ev->getCoreId()]))(pkt); - - delete ev; -} - -void PageFaultHandler::allocatePage(const uint32_t thread, const uint32_t level, const uint64_t virtualAddress, const uint64_t size) { - OpalEvent * tse = new OpalEvent(OpalComponent::EventType::REQUEST); - tse->setResp(virtualAddress, 0, size); - tse->setFaultLevel(level); - opalLink[thread]->send(tse); - -} - diff --git a/src/sst/elements/opal/page_fault_handler.h b/src/sst/elements/opal/page_fault_handler.h deleted file mode 100644 index ff1eca81a9..0000000000 --- a/src/sst/elements/opal/page_fault_handler.h +++ /dev/null @@ -1,71 +0,0 @@ -// Copyright 2009-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - - -#ifndef _H_OPAL_PAGE_FAULT_HANDLER_OPAL -#define _H_OPAL_PAGE_FAULT_HANDLER_OPAL - -#include -#include - -#include "sst/elements/samba/page_fault_handler.h" - -#include -#include -#include -#include - -using namespace SST; - -namespace SST { -namespace OpalComponent { - -class PageFaultHandler : public SambaComponent::PageFaultHandler { - - public: - /* SST ELI */ - SST_ELI_REGISTER_SUBCOMPONENT( - PageFaultHandler, - "Opal", - "PageFaultHandler", - SST_ELI_ELEMENT_VERSION(1,0,0), - "Page fault hander uses the Opal memory allocation component", - SST::SambaComponent::PageFaultHandler - ) - - SST_ELI_DOCUMENT_PARAMS( - { "opal_latency", "latency to communicate to the Opal manager", "32ps"} - ) - - SST_ELI_DOCUMENT_PORTS( - {"opal_link_%(corecound)d", "Each core's mmu link to the Opal page fault handler", {"Opal.OpalEvent"}} - ) - - /* MemoryManagerOpal */ - PageFaultHandler(ComponentId_t id, Params& params); - ~PageFaultHandler(); - - void handleEvent(SST::Event * event); - void allocatePage(const uint32_t thread, const uint32_t level, const uint64_t virtualAddress, const uint64_t size); - - private: - - std::vector opalLink; -}; - -} -} - -#endif diff --git a/src/sst/elements/opal/tests/app/makefile b/src/sst/elements/opal/tests/app/makefile deleted file mode 100644 index 879671b16f..0000000000 --- a/src/sst/elements/opal/tests/app/makefile +++ /dev/null @@ -1,19 +0,0 @@ -CXX=g++ - -opal_test: opal_test.o - $(CXX) -O0 -o opal_test -fopenmp opal_test.o - -opal_test.o: opal_test.c - $(CXX) -O0 -o opal_test.o -fopenmp -c opal_test.c - -opal_mlm: opal_mlm.o - $(CXX) -O0 -o opal_mlm -fopenmp opal_mlm.o - -opal_mlm.o: opal_mlm.c - $(CXX) -O0 -o opal_mlm.o -fopenmp -c opal_mlm.c - -all: opal_test opal_mlm - -clean: - rm opal_test opal_test.o opal_mlm opal_mlm.o - diff --git a/src/sst/elements/opal/tests/app/opal_mlm.c b/src/sst/elements/opal/tests/app/opal_mlm.c deleted file mode 100644 index 64b744bdce..0000000000 --- a/src/sst/elements/opal/tests/app/opal_mlm.c +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2009-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#include -#include -#include -#include - -#include - - -#define NUM_INTS 1000000 - -using namespace std; - -#pragma GCC push_options -#pragma GCC optimize ("O0") - -extern "C" { - -void* mlm_malloc(size_t size, int level) -{ - if(size == 0) - { - printf("ZERO BYTE MALLOC\n"); - void* bt_entries[64]; - exit(-1); - } - - printf("Performing a mlm Malloc for size %llu\n", size); - - return malloc(size); -} - -void ariel_enable() { } - -void * ariel_mmap_mlm(int ID, size_t size, int level) { return mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, ID, 0); } - -} - -#pragma GCC pop_options - -int main() -{ - ariel_enable(); - int * x = (int *) malloc(sizeof(int) * NUM_INTS); - int * y = (int *) mlm_malloc(sizeof(int) * NUM_INTS, 1); - - int * z = (int *) ariel_mmap_mlm(660066, sizeof(int) * NUM_INTS, 2); - - for (int i = 0; i < NUM_INTS; ) - { - cout << "*** " << i << " ***\n"; - x[i] = i * 2; - y[i] = i * 2; - z[i] = i * 2; - i = i + 1024; - } - - for (int i = 0; i < NUM_INTS; ) - { - cout << "Value is: " << x[i] << ", " << y[i] << ", " << z[i] << endl; - i = i + 1024; - } - - cout << "Test MLM Malloc" << endl; - - - return 0; -} diff --git a/src/sst/elements/opal/tests/app/opal_test.c b/src/sst/elements/opal/tests/app/opal_test.c deleted file mode 100644 index 93e75b39a1..0000000000 --- a/src/sst/elements/opal/tests/app/opal_test.c +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright 2009-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#include -#include - -extern "C" -void ariel_enable() { printf("Inside Ariel\n"); } - -int main(int argc, char* argv[]) { - - const int LENGTH = 2000; - - ariel_enable(); - - printf("Allocating arrays of size %d elements.\n", LENGTH); - double* a = (double*) malloc(sizeof(double) * LENGTH); - double* b = (double*) malloc(sizeof(double) * LENGTH); - double* c = (double*) malloc(sizeof(double) * LENGTH); - printf("Done allocating arrays.\n"); - - int i; - for(i = 0; i < LENGTH; ++i) { - a[i] = i; - b[i] = LENGTH - i; - c[i] = 0; - } - - - printf("Perfoming the fast_c compute loop...\n"); - #pragma omp parallel num_threads(2) - for(i = 0; i < LENGTH; ++i) { - //printf("issuing a write to: %llu (fast_c)\n", ((unsigned long long int) &fast_c[i])); - c[i] = 2.0 * a[i] + 1.5 * b[i]; - } - - double sum = 0; - for(i = 0; i < LENGTH; ++i) { - sum += c[i]; - } - - printf("Sum of arrays is: %f\n", sum); - printf("Freeing arrays...\n"); - - free(a); - free(b); - free(c); - - printf("Done.\n"); -} diff --git a/src/sst/elements/opal/tests/basic_1node_1smp.py b/src/sst/elements/opal/tests/basic_1node_1smp.py deleted file mode 100644 index cd3cf44586..0000000000 --- a/src/sst/elements/opal/tests/basic_1node_1smp.py +++ /dev/null @@ -1,415 +0,0 @@ -import sst - - -# Define SST core options -sst.setProgramOption("timebase", "1ps") - -# Tell SST what statistics handling we want -sst.setStatisticLoadLevel(4) - -clock = "2GHz" - -cores = 2*2 - -#os.environ['OMP_NUM_THREADS'] = str(cores/2) - - -local_memory_capacity = 128 # Size of memory in MBs -shared_memory_capacity = 2048 # 2GB -shared_memory = 1 -page_size = 4 # In KB -num_pages = local_memory_capacity * 1024 // page_size + 8*1024*1024//page_size - - -ariel = sst.Component("cpu", "ariel.ariel") -ariel.addParams({ - "verbose" : 1, - "clock" : clock, - "maxcorequeue" : 1024, - "maxissuepercycle" : 2, - "maxtranscore": 16, - "pipetimeout" : 0, - "corecount" : cores//2, - "arielmode" : 0, - "appargcount" : 0, - "max_insts" : 10000, - "executable" : "./app/opal_test", - "node" : 0, - "launchparamcount" : 1, - "launchparam0" : "-ifeellucky", -}) - -# Opal uses this memory manager to intercept memory translation requests, mallocs, mmaps, etc. -memmgr = ariel.setSubComponent("memmgr", "Opal.MemoryManagerOpal") -memmgr.addParams({ - "opal_latency" : "30ps" -}) -# Opal uses this memory manager (for now?) to do the actual translation -submemmgr = memmgr.setSubComponent("translator", "ariel.MemoryManagerSimple") -submemmgr.addParams({ - "pagecount0" : num_pages, - "pagesize0" : page_size * 1024, -}) - -ariel.enableAllStatistics({"type":"sst.AccumulatorStatistic"}) - -mmu = sst.Component("mmu", "Samba") -mmu.addParams({ - "os_page_size": 4, - "perfect": 0, - "corecount": cores//2, - "sizes_L1": 3, - "page_size1_L1": 4, - "page_size2_L1": 2048, - "page_size3_L1": 1024*1024, - "assoc1_L1": 4, - "size1_L1": 32, - "assoc2_L1": 4, - "size2_L1": 32, - "assoc3_L1": 4, - "size3_L1": 4, - "sizes_L2": 4, - "page_size1_L2": 4, - "page_size2_L2": 2048, - "page_size3_L2": 1024*1024, - "assoc1_L2": 12, - "size1_L2": 1536,#1536, - "assoc2_L2": 32, #12, - "size2_L2": 32, #1536, - "assoc3_L2": 4, - "size3_L2": 16, - "clock": clock, - "levels": 2, - "max_width_L1": 3, - "max_outstanding_L1": 2, - "max_outstanding_PTWC": 2, - "latency_L1": 4, - "parallel_mode_L1": 1, - "max_outstanding_L2": 2, - "max_width_L2": 4, - "latency_L2": 10, - "parallel_mode_L2": 0, - "self_connected" : 0, - "page_walk_latency": 200, - "size1_PTWC": 32, # this just indicates the number entries of the page table walk cache level 1 (PTEs) - "assoc1_PTWC": 4, # this just indicates the associtativit the page table walk cache level 1 (PTEs) - "size2_PTWC": 32, # this just indicates the number entries of the page table walk cache level 2 (PMDs) - "assoc2_PTWC": 4, # this just indicates the associtativit the page table walk cache level 2 (PMDs) - "size3_PTWC": 32, # this just indicates the number entries of the page table walk cache level 3 (PUDs) - "assoc3_PTWC": 4, # this just indicates the associtativit the page table walk cache level 3 (PUDs) - "size4_PTWC": 32, # this just indicates the number entries of the page table walk cache level 4 (PGD) - "assoc4_PTWC": 4, # this just indicates the associtativit the page table walk cache level 4 (PGD) - "latency_PTWC": 10, # This is the latency of checking the page table walk cache - "opal_latency": "30ps", - "emulate_faults": 1, -}) -mmu.enableAllStatistics({"type":"sst.AccumulatorStatistic"}) - -# MMU uses this page fault handler. -pagefaulthandler = mmu.setSubComponent("pagefaulthandler", "Opal.PageFaultHandler") -pagefaulthandler.addParams({ - "opal_latency" : "30ps" -}) - -opal= sst.Component("opal","Opal") -opal.addParams({ - "clock" : clock, - "num_nodes" : 1, - "verbose" : 1, - "max_inst" : 32, - "shared_mempools" : 1, - "shared_mem.mempool0.start" : local_memory_capacity*1024*1024, - "shared_mem.mempool0.size" : shared_memory_capacity*1024, - "shared_mem.mempool0.frame_size": page_size, - "shared_mem.mempool0.mem_type" : 0, - "node0.cores" : cores//2, - "node0.allocation_policy" : 1, - "node0.page_migration" : 0, - "node0.page_migration_policy" : 0, - "node0.num_pages_to_migrate" : 0, - "node0.latency" : 2000, - "node0.memory.start" : 0, - "node0.memory.size" : local_memory_capacity*1024, - "node0.memory.frame_size" : page_size, - "node0.memory.mem_type" : 0, - "num_ports" : cores, -}) -opal.enableAllStatistics({"type":"sst.AccumulatorStatistic"}) - - -l1_params = { - "cache_frequency": clock, - "cache_size": "32KiB", - "associativity": 8, - "access_latency_cycles": 4, - "L1": 1, - "verbose": 30, - "maxRequestDelay" : "1000000", -} - -l2_params = { - "cache_frequency": clock, - "cache_size": "256KiB", - "associativity": 8, - "access_latency_cycles": 6, - "mshr_num_entries" : 16, -} - -l3_params = { - "access_latency_cycles" : "12", - "cache_frequency" : clock, - "associativity" : "16", - "cache_size" : "2MB", - "mshr_num_entries" : "4096", - "num_cache_slices" : 1, - "slice_allocation_policy" : "rr", -} - -link_params = { - "shared_memory": shared_memory, - "node": 0, -} - -nic_params = { - "shared_memory": shared_memory, - "node": 0, - "network_bw": "96GiB/s", - "local_memory_size" : local_memory_capacity*1024*1024, -} - - - -class Network: - def __init__(self, name,networkId,input_latency,output_latency): - self.name = name - self.ports = 0 - self.rtr = sst.Component("rtr_%s"%name, "merlin.hr_router") - self.rtr.addParams({ - "id": networkId, - #"topology": "merlin.singlerouter", - "link_bw" : "80GiB/s", - "xbar_bw" : "80GiB/s", - "flit_size" : "8B", - "input_latency" : input_latency, - "output_latency" : output_latency, - "input_buf_size" : "1KB", - "output_buf_size" : "1KB", - }) - - topo = self.rtr.setSubComponent("topology", "merlin.singlerouter") - - def getNextPort(self): - self.ports += 1 - self.rtr.addParam("num_ports", self.ports) - return (self.ports-1) - - - -internal_network = Network("internal_network",0,"20ps","20ps") - -for next_core in range(cores): - - l1 = sst.Component("l1cache_" + str(next_core), "memHierarchy.Cache") - l1.addParams(l1_params) - l1_cpulink = l1.setSubComponent("cpulink", "memHierarchy.MemLink") - l1_memlink = l1.setSubComponent("memlink", "memHierarchy.MemLink") - l1_cpulink.addParams(link_params) - l1_memlink.addParams(link_params) - - l2 = sst.Component("l2cache_" + str(next_core), "memHierarchy.Cache") - l2.addParams(l2_params) - l2_cpulink = l2.setSubComponent("cpulink", "memHierarchy.MemLink") - l2_memlink = l2.setSubComponent("memlink", "Opal.OpalMemNIC") - l2_cpulink.addParams(link_params) - l2_memlink.addParams(nic_params) - l2_memlink.addParams({ "group" : 1}) - - arielMMULink = sst.Link("cpu_mmu_link_" + str(next_core)) - MMUCacheLink = sst.Link("mmu_cache_link_" + str(next_core)) - PTWMemLink = sst.Link("ptw_mem_link_" + str(next_core)) - PTWOpalLink = sst.Link("ptw_opal_" + str(next_core)) - ArielOpalLink = sst.Link("ariel_opal_" + str(next_core)) - - if next_core < cores//2: - arielMMULink.connect((ariel, "cache_link_%d"%next_core, "300ps"), (mmu, "cpu_to_mmu%d"%next_core, "300ps")) - ArielOpalLink.connect((memmgr, "opal_link_%d"%next_core, "300ps"), (opal, "coreLink%d"%(next_core), "300ps")) - MMUCacheLink.connect((mmu, "mmu_to_cache%d"%next_core, "300ps"), (l1_cpulink, "port", "300ps")) - PTWOpalLink.connect( (pagefaulthandler, "opal_link_%d"%next_core, "300ps"), (opal, "mmuLink%d"%(next_core), "300ps") ) - else: - PTWMemLink.connect((mmu, "ptw_to_mem%d"%(next_core-cores//2), "300ps"), (l1_cpulink, "port", "300ps")) - - l2_core_link = sst.Link("l2cache_" + str(next_core) + "_link") - l2_core_link.connect((l1_memlink, "port", "300ps"), (l2_cpulink, "port", "300ps")) - - l2_ring_link = sst.Link("l2_ring_link_" + str(next_core)) - l2_ring_link.connect((l2_memlink, "port", "300ps"), (internal_network.rtr, "port%d"%(internal_network.getNextPort()), "300ps")) - - - -l3cache = sst.Component("l3cache", "memHierarchy.Cache") -l3cache.addParams(l3_params) -l3_link = l3cache.setSubComponent("cpulink", "Opal.OpalMemNIC") -l3cache.addParams({ "slice_id" : 0 }) -l3_link.addParams(nic_params) -l3_link.addParams({ - "group" : 2, - "addr_range_start": 0, - "addr_range_end": (local_memory_capacity*1024*1024) - 1, - "interleave_size": "0B", -}) - -l3_ring_link = sst.Link("l3_link") -l3_ring_link.connect( (l3_link, "port", "300ps"), (internal_network.rtr, "port%d"%(internal_network.getNextPort()), "300ps")) - - -mem = sst.Component("local_memory", "memHierarchy.MemController") -mem.addParams({ - "clock" : "1.2GHz", - "backing" : "none", - "backend" : "memHierarchy.timingDRAM", - "backend.id" : 0, - "backend.addrMapper" : "memHierarchy.roundRobinAddrMapper", - "backend.addrMapper.interleave_size" : "64B", - "backend.addrMapper.row_size" : "1KiB", - "backend.clock" : "1.2GHz", - "backend.mem_size" : str(local_memory_capacity) + "MiB", - "backend.channels" : 2, - "backend.channel.numRanks" : 2, - "backend.channel.rank.numBanks" : 16, - "backend.channel.transaction_Q_size" : 32, - "backend.channel.rank.bank.CL" : 14, - "backend.channel.rank.bank.CL_WR" : 12, - "backend.channel.rank.bank.RCD" : 14, - "backend.channel.rank.bank.TRP" : 14, - "backend.channel.rank.bank.dataCycles" : 2, - "backend.channel.rank.bank.pagePolicy" : "memHierarchy.simplePagePolicy", - "backend.channel.rank.bank.transactionQ" : "memHierarchy.fifoTransactionQ", - "backend.channel.rank.bank.pagePolicy.close" : 1, -}) -mem_link = mem.setSubComponent("cpulink", "memHierarchy.MemLink") -mem_link.addParams({ - "shared_memory": 1, - "node" : 0 -}) - -dc = sst.Component("dc", "memHierarchy.DirectoryController") -dc.addParams({ - "entry_cache_size": 256*1024*1024, #Entry cache size of mem/blocksize - "clock": "200MHz", - #"debug" : 1, - #"debug_level" : 10, -}) - -dc_cpulink = dc.setSubComponent("cpulink", "Opal.OpalMemNIC") -dc_memlink = dc.setSubComponent("memlink", "memHierarchy.MemLink") -dc_memlink.addParams(link_params) -dc_cpulink.addParams(nic_params) -dc_cpulink.addParams({ - "group" : 3, - "addr_range_start" : 0, - "addr_range_end" : (local_memory_capacity*1024*1024)-1, - "interleave_size": "0B", - "shared_memory": shared_memory, - "node": 0, - #"debug" : 1, - #"debug_level" : 10, -}) - -memLink = sst.Link("mem_link") -memLink.connect((mem_link, "port", "300ps"), (dc_memlink, "port", "300ps")) - -netLink = sst.Link("dc_link") -netLink.connect((dc_cpulink, "port", "300ps"), (internal_network.rtr, "port%d"%(internal_network.getNextPort()), "300ps")) - - - - - -# External memory configuration - -external_network = Network("Ext_Mem_Net",1,"20ns","20ns") -port = external_network.getNextPort() - -ext_mem = sst.Component("ExternalNVMmemContr", "memHierarchy.MemController") -ext_mem.addParams({ - "memory_size" : str(shared_memory_capacity) + "MB", - "max_requests_per_cycle" : 4, - "backing" : "none", - "clock" : clock, -}) - -ext_memory = ext_mem.setSubComponent("backend", "memHierarchy.Messier") -ext_memory.addParams({ - "max_requests_per_cycle" : 4, - "mem_size" : str(shared_memory_capacity) + "MB", - "clock" : clock, -}) - -ext_mem_link = ext_mem.setSubComponent("cpulink", "memHierarchy.MemLink") -ext_mem_link.addParams({ "node" : 9999, }) ## does not belong to any node - -ext_dc = sst.Component("ExtMemDc", "memHierarchy.DirectoryController") -ext_dc.addParams({ - "entry_cache_size": 256*1024*1024, #Entry cache size of mem/blocksize - "clock": "1GHz", -}) -ext_dc_cpulink = ext_dc.setSubComponent("cpulink", "Opal.OpalMemNIC") -ext_dc_memlink = ext_dc.setSubComponent("memlink", "memHierarchy.MemLink") -ext_dc_cpulink.addParams({ - "network_bw": "80GiB/s", - "addr_range_start" : (local_memory_capacity*1024*1024), - "addr_range_end" : (local_memory_capacity*1024*1024) + (shared_memory_capacity*1024*1024) -1, - "node": 9999, - "group" : 3, # TODO is this the right routing group? means sources are all components in group 2 and dests are all components in group 4 -}) - - -messier = sst.Component("ExternalMem" , "Messier") -messier.addParams({ - "clock" : clock, - "tCL" : 30, - "tRCD" : 300, - "tCL_W" : 1000, - "write_buffer_size" : 32, - "flush_th" : 90, - "num_banks" : 16, - "max_outstanding" : 16, - "max_writes" : "4", - "max_current_weight" : 32*50, - "read_weight" : "5", - "write_weight" : "5", - "cacheline_interleaving" : 0, -}) - -link_nvm_bus_link = sst.Link("External_mem_nvm_link") -link_nvm_bus_link.connect( (messier, "bus", "50ps"), (ext_memory, "nvm_link", "50ps") ) - -extmemLink = sst.Link("External_mem_dc_link") -extmemLink.connect( (ext_dc_memlink, "port", "500ps"), (ext_mem_link, "port", "500ps") ) - -ext_dcLink = sst.Link("External_mem_link") -ext_dcLink.connect( (ext_dc_cpulink, "port", "500ps"), (external_network.rtr, "port%d"%port, "500ps") ) - - - -# Connecting Internal and External network -def bridge(net0, net1): - net0port = net0.getNextPort() - net1port = net1.getNextPort() - name = "%s-%s"%(net0.name, net1.name) - bridge = sst.Component("Bridge:%s"%name, "merlin.Bridge") - bridge.addParams({ - "translator": "memHierarchy.MemNetBridge", - "network_bw" : "80GiB/s", - }) - link = sst.Link("B0-%s"%name) - link.connect( (bridge, "network0", "500ps"), (net0.rtr, "port%d"%net0port, "500ps") ) - link = sst.Link("B1-%s"%name) - link.connect( (bridge, "network1", "500ps"), (net1.rtr, "port%d"%net1port, "500ps") ) - - -midnet = Network("Bridge",3,"50ps","50ps") -bridge(internal_network, midnet) -bridge(external_network, midnet) - - diff --git a/src/sst/elements/opal/tests/basic_2node_1smp.py b/src/sst/elements/opal/tests/basic_2node_1smp.py deleted file mode 100644 index 32dc35d3c6..0000000000 --- a/src/sst/elements/opal/tests/basic_2node_1smp.py +++ /dev/null @@ -1,430 +0,0 @@ -import sst - - -# Define SST core options -sst.setProgramOption("timebase", "1ps") - -# Tell SST what statistics handling we want -sst.setStatisticLoadLevel(4) - -clock = "2GHz" - -cores = 2*2 - -#os.environ['OMP_NUM_THREADS'] = str(cores/2) - -nodes = 2 -local_memory_capacity = 128 # Size of memory in MBs -shared_memory_capacity = 2048 # 2GB -shared_memory = 1 -page_size = 4 # In KB -num_pages = local_memory_capacity * 1024 // page_size + 8*1024*1024//page_size - - -arielParams = { - "verbose" : 1, - "clock" : clock, - "maxcorequeue" : 1024, - "maxissuepercycle" : 2, - "maxtranscore": 16, - "pipetimeout" : 0, - "corecount" : cores//2, - "arielmode" : 0, - "appargcount" : 0, - "max_insts" : 10000, - "executable" : "./app/opal_test", - "node" : 0, - "launchparamcount" : 1, - "launchparam0" : "-ifeellucky", -} - -mmuParams = { - "os_page_size": 4, - "perfect": 0, - "corecount": cores//2, - "sizes_L1": 3, - "page_size1_L1": 4, - "page_size2_L1": 2048, - "page_size3_L1": 1024*1024, - "assoc1_L1": 4, - "size1_L1": 32, - "assoc2_L1": 4, - "size2_L1": 32, - "assoc3_L1": 4, - "size3_L1": 4, - "sizes_L2": 4, - "page_size1_L2": 4, - "page_size2_L2": 2048, - "page_size3_L2": 1024*1024, - "assoc1_L2": 12, - "size1_L2": 1536,#1536, - "assoc2_L2": 32, #12, - "size2_L2": 32, #1536, - "assoc3_L2": 4, - "size3_L2": 16, - "clock": clock, - "levels": 2, - "max_width_L1": 3, - "max_outstanding_L1": 2, - "max_outstanding_PTWC": 2, - "latency_L1": 4, - "parallel_mode_L1": 1, - "max_outstanding_L2": 2, - "max_width_L2": 4, - "latency_L2": 10, - "parallel_mode_L2": 0, - "self_connected" : 0, - "page_walk_latency": 200, - "size1_PTWC": 32, # this just indicates the number entries of the page table walk cache level 1 (PTEs) - "assoc1_PTWC": 4, # this just indicates the associtativit the page table walk cache level 1 (PTEs) - "size2_PTWC": 32, # this just indicates the number entries of the page table walk cache level 2 (PMDs) - "assoc2_PTWC": 4, # this just indicates the associtativit the page table walk cache level 2 (PMDs) - "size3_PTWC": 32, # this just indicates the number entries of the page table walk cache level 3 (PUDs) - "assoc3_PTWC": 4, # this just indicates the associtativit the page table walk cache level 3 (PUDs) - "size4_PTWC": 32, # this just indicates the number entries of the page table walk cache level 4 (PGD) - "assoc4_PTWC": 4, # this just indicates the associtativit the page table walk cache level 4 (PGD) - "latency_PTWC": 10, # This is the latency of checking the page table walk cache - "opal_latency": "30ps", - "emulate_faults": 1, -} - - -opal= sst.Component("opal","Opal") -opal.addParams({ - "clock" : clock, - "num_nodes" : nodes, - "verbose" : 1, - "max_inst" : 32, - "shared_mempools" : 1, - "shared_mem.mempool0.start" : local_memory_capacity*1024*1024, - "shared_mem.mempool0.size" : shared_memory_capacity*1024, - "shared_mem.mempool0.frame_size": page_size, - "shared_mem.mempool0.mem_type" : 0, - "node0.cores" : cores//2, - "node0.allocation_policy" : 1, - "node0.latency" : 2000, - "node0.memory.start" : 0, - "node0.memory.size" : local_memory_capacity*1024, - "node0.memory.frame_size" : page_size, - "node0.memory.mem_type" : 0, - "node1.cores" : cores//2, - "node1.allocation_policy" : 1, - "node1.latency" : 2000, - "node1.memory.start" : 0, - "node1.memory.size" : local_memory_capacity*1024, - "node1.memory.frame_size" : page_size, - "node1.memory.mem_type" : 0, - "num_ports" : cores*nodes, -}) -opal.enableAllStatistics({"type":"sst.AccumulatorStatistic"}) - - -l1_params = { - "cache_frequency": clock, - "cache_size": "32KiB", - "associativity": 8, - "access_latency_cycles": 4, - "L1": 1, - "verbose": 30, - "maxRequestDelay" : "1000000", -} - -l2_params = { - "cache_frequency": clock, - "cache_size": "256KiB", - "associativity": 8, - "access_latency_cycles": 6, - "mshr_num_entries" : 16, -} - -l3_params = { - "access_latency_cycles" : "12", - "cache_frequency" : clock, - "associativity" : "16", - "cache_size" : "2MB", - "mshr_num_entries" : "4096", - "num_cache_slices" : 1, - "slice_allocation_policy" : "rr", -} - -link_params = { - "shared_memory": shared_memory, -# "node": 0, -} - -nic_params = { - "shared_memory": shared_memory, -# "node": 0, - "network_bw": "96GiB/s", - "local_memory_size" : local_memory_capacity*1024*1024, -} - - - -class Network: - def __init__(self, name,networkId,input_latency,output_latency): - self.name = name - self.ports = 0 - self.rtr = sst.Component("rtr_%s"%name, "merlin.hr_router") - self.rtr.addParams({ - "id": networkId, - #"topology": "merlin.singlerouter", - "link_bw" : "80GiB/s", - "xbar_bw" : "80GiB/s", - "flit_size" : "8B", - "input_latency" : input_latency, - "output_latency" : output_latency, - "input_buf_size" : "1KB", - "output_buf_size" : "1KB", - }) - - topo = self.rtr.setSubComponent("topology", "merlin.singlerouter") - - def getNextPort(self): - self.ports += 1 - self.rtr.addParam("num_ports", self.ports) - return (self.ports-1) - -internal_network_map = {} - -for node in range(nodes): - ariel = sst.Component("node"+str(node)+"_cpu", "ariel.ariel") - ariel.addParams(arielParams) - - # Opal uses this memory manager to intercept memory translation requests, mallocs, mmaps, etc. - memmgr = ariel.setSubComponent("memmgr", "Opal.MemoryManagerOpal") - memmgr.addParams({ - "opal_latency" : "30ps" - }) - # Opal uses this memory manager (for now?) to do the actual translation - submemmgr = memmgr.setSubComponent("translator", "ariel.MemoryManagerSimple") - submemmgr.addParams({ - "pagecount0" : num_pages, - "pagesize0" : page_size * 1024, - }) - - ariel.enableAllStatistics({"type":"sst.AccumulatorStatistic"}) - - # MMU - mmu = sst.Component("node"+str(node)+"_mmu", "Samba") - mmu.addParams(mmuParams) - - # MMU uses this page fault handler - pagefaulthandler = mmu.setSubComponent("pagefaulthandler", "Opal.PageFaultHandler") - pagefaulthandler.addParams({ - "opal_latency" : "30ps" - }) - - mmu.enableAllStatistics({"type":"sst.AccumulatorStatistic"}) - - internal_network = Network("node"+str(node)+"_internal_network",0,"20ps","20ps") - - for next_core in range(cores): - - l1 = sst.Component("node"+str(node)+"_l1cache_" + str(next_core), "memHierarchy.Cache") - l1.addParams(l1_params) - l1_cpulink = l1.setSubComponent("cpulink", "memHierarchy.MemLink") - l1_memlink = l1.setSubComponent("memlink", "memHierarchy.MemLink") - l1_cpulink.addParams(link_params) - l1_memlink.addParams(link_params) - l1_cpulink.addParams({"node": node,}) - l1_memlink.addParams({"node": node,}) - - l2 = sst.Component("node"+str(node)+"_l2cache_" + str(next_core), "memHierarchy.Cache") - l2.addParams(l2_params) - l2_cpulink = l2.setSubComponent("cpulink", "memHierarchy.MemLink") - l2_memlink = l2.setSubComponent("memlink", "Opal.OpalMemNIC") - l2_cpulink.addParams(link_params) - l2_memlink.addParams(nic_params) - l2_cpulink.addParams({ "node" : node}) - l2_memlink.addParams({ "node" : node}) - l2_memlink.addParams({ "group" : 1}) - - arielMMULink = sst.Link("node"+str(node)+"_cpu_mmu_link_" + str(next_core)) - MMUCacheLink = sst.Link("node"+str(node)+"_mmu_cache_link_" + str(next_core)) - PTWMemLink = sst.Link("node"+str(node)+"_ptw_mem_link_" + str(next_core)) - PTWOpalLink = sst.Link("node"+str(node)+"_ptw_opal_" + str(next_core)) - ArielOpalLink = sst.Link("node"+str(node)+"_ariel_opal_" + str(next_core)) - - if next_core < cores//2: - arielMMULink.connect((ariel, "cache_link_%d"%next_core, "300ps"), (mmu, "cpu_to_mmu%d"%next_core, "300ps")) - ArielOpalLink.connect((memmgr, "opal_link_%d"%next_core, "300ps"), (opal, "coreLink%d"%(next_core + node*(cores//2)), "300ps")) - MMUCacheLink.connect((mmu, "mmu_to_cache%d"%next_core, "300ps"), (l1_cpulink, "port", "300ps")) - PTWOpalLink.connect( (pagefaulthandler, "opal_link_%d"%next_core, "300ps"), (opal, "mmuLink%d"%(next_core + node*(cores//2)), "300ps") ) - else: - PTWMemLink.connect((mmu, "ptw_to_mem%d"%(next_core-cores//2), "300ps"), (l1_cpulink, "port", "300ps")) - - l2_core_link = sst.Link("node"+str(node)+"_l2cache_" + str(next_core) + "_link") - l2_core_link.connect((l1_memlink, "port", "300ps"), (l2_cpulink, "port", "300ps")) - - l2_ring_link = sst.Link("node"+str(node)+"_l2_ring_link_" + str(next_core)) - l2_ring_link.connect((l2_memlink, "port", "300ps"), (internal_network.rtr, "port%d"%(internal_network.getNextPort()), "300ps")) - - l3cache = sst.Component("node"+str(node)+"_l3cache", "memHierarchy.Cache") - l3cache.addParams(l3_params) - l3_link = l3cache.setSubComponent("cpulink", "Opal.OpalMemNIC") - l3cache.addParams({ "slice_id" : 0 }) - l3_link.addParams(nic_params) - l3_link.addParams({ - "node" : node, - "group" : 2, - "addr_range_start": 0, - "addr_range_end": (local_memory_capacity*1024*1024) - 1, - "interleave_size": "0B", - }) - - l3_ring_link = sst.Link("node"+str(node)+"_l3_link") - l3_ring_link.connect( (l3_link, "port", "300ps"), (internal_network.rtr, "port%d"%(internal_network.getNextPort()), "300ps")) - - - mem = sst.Component("node"+str(node)+"_local_memory", "memHierarchy.MemController") - mem.addParams({ - "clock" : "1.2GHz", - "backing" : "none", - "backend" : "memHierarchy.timingDRAM", - "backend.id" : 0, - "backend.addrMapper" : "memHierarchy.roundRobinAddrMapper", - "backend.addrMapper.interleave_size" : "64B", - "backend.addrMapper.row_size" : "1KiB", - "backend.clock" : "1.2GHz", - "backend.mem_size" : str(local_memory_capacity) + "MiB", - "backend.channels" : 2, - "backend.channel.numRanks" : 2, - "backend.channel.rank.numBanks" : 16, - "backend.channel.transaction_Q_size" : 32, - "backend.channel.rank.bank.CL" : 14, - "backend.channel.rank.bank.CL_WR" : 12, - "backend.channel.rank.bank.RCD" : 14, - "backend.channel.rank.bank.TRP" : 14, - "backend.channel.rank.bank.dataCycles" : 2, - "backend.channel.rank.bank.pagePolicy" : "memHierarchy.simplePagePolicy", - "backend.channel.rank.bank.transactionQ" : "memHierarchy.fifoTransactionQ", - "backend.channel.rank.bank.pagePolicy.close" : 1, - }) - mem_link = mem.setSubComponent("cpulink", "memHierarchy.MemLink") - mem_link.addParams({ - "shared_memory": 1, - "node" : 0 - }) - - dc = sst.Component("node"+str(node)+"_dc", "memHierarchy.DirectoryController") - dc.addParams({ - "entry_cache_size": 256*1024*1024, #Entry cache size of mem/blocksize - "clock": "200MHz", - #"debug" : 1, - #"debug_level" : 10, - }) - - dc_cpulink = dc.setSubComponent("cpulink", "Opal.OpalMemNIC") - dc_memlink = dc.setSubComponent("memlink", "memHierarchy.MemLink") - dc_memlink.addParams(link_params) - dc_cpulink.addParams(nic_params) - dc_cpulink.addParams({ - "node" : node, - "group" : 3, - "addr_range_start" : 0, - "addr_range_end" : (local_memory_capacity*1024*1024)-1, - "interleave_size": "0B", - #"debug" : 1, - #"debug_level" : 10, - }) - - memLink = sst.Link("node"+str(node)+"_mem_link") - memLink.connect((mem_link, "port", "300ps"), (dc_memlink, "port", "300ps")) - - netLink = sst.Link("node"+str(node)+"_dc_link") - netLink.connect((dc_cpulink, "port", "300ps"), (internal_network.rtr, "port%d"%(internal_network.getNextPort()), "300ps")) - - internal_network_map[str(node)] = internal_network - - - -# External memory configuration - -external_network = Network("Ext_Mem_Net",1,"20ns","20ns") -port = external_network.getNextPort() - -ext_mem = sst.Component("ExternalNVMmemContr", "memHierarchy.MemController") -ext_mem.addParams({ - "memory_size" : str(shared_memory_capacity) + "MB", - "max_requests_per_cycle" : 4, - "backing" : "none", - "clock" : clock, -}) - -ext_memory = ext_mem.setSubComponent("backend", "memHierarchy.Messier") -ext_memory.addParams({ - "max_requests_per_cycle" : 4, - "mem_size" : str(shared_memory_capacity) + "MB", - "clock" : clock, -}) - -ext_mem_link = ext_mem.setSubComponent("cpulink", "memHierarchy.MemLink") -ext_mem_link.addParams({ "node" : 9999, }) ## does not belong to any node - -ext_dc = sst.Component("ExtMemDc", "memHierarchy.DirectoryController") -ext_dc.addParams({ - "entry_cache_size": 256*1024*1024, #Entry cache size of mem/blocksize - "clock": "1GHz", -}) -ext_dc_cpulink = ext_dc.setSubComponent("cpulink", "Opal.OpalMemNIC") -ext_dc_memlink = ext_dc.setSubComponent("memlink", "memHierarchy.MemLink") -ext_dc_cpulink.addParams({ - "network_bw": "80GiB/s", - "addr_range_start" : (local_memory_capacity*1024*1024), - "addr_range_end" : (local_memory_capacity*1024*1024) + (shared_memory_capacity*1024*1024) -1, - "node": 9999, - "group" : 3, # TODO is this the right routing group? means sources are all components in group 2 and dests are all components in group 4 -}) - - -messier = sst.Component("ExternalMem" , "Messier") -messier.addParams({ - "clock" : clock, - "tCL" : 30, - "tRCD" : 300, - "tCL_W" : 1000, - "write_buffer_size" : 32, - "flush_th" : 90, - "num_banks" : 16, - "max_outstanding" : 16, - "max_writes" : "4", - "max_current_weight" : 32*50, - "read_weight" : "5", - "write_weight" : "5", - "cacheline_interleaving" : 0, -}) - -link_nvm_bus_link = sst.Link("External_mem_nvm_link") -link_nvm_bus_link.connect( (messier, "bus", "50ps"), (ext_memory, "nvm_link", "50ps") ) - -extmemLink = sst.Link("External_mem_dc_link") -extmemLink.connect( (ext_dc_memlink, "port", "500ps"), (ext_mem_link, "port", "500ps") ) - -ext_dcLink = sst.Link("External_mem_link") -ext_dcLink.connect( (ext_dc_cpulink, "port", "500ps"), (external_network.rtr, "port%d"%port, "500ps") ) - - - -# Connecting Internal and External network -def bridge(net0, net1): - net0port = net0.getNextPort() - net1port = net1.getNextPort() - name = "%s-%s"%(net0.name, net1.name) - bridge = sst.Component("Bridge:%s"%name, "merlin.Bridge") - bridge.addParams({ - "translator": "memHierarchy.MemNetBridge", - "network_bw" : "80GiB/s", - }) - link = sst.Link("B0-%s"%name) - link.connect( (bridge, "network0", "500ps"), (net0.rtr, "port%d"%net0port, "500ps") ) - link = sst.Link("B1-%s"%name) - link.connect( (bridge, "network1", "500ps"), (net1.rtr, "port%d"%net1port, "500ps") ) - - -for node in range(nodes): - midnet = Network("node"+str(node)+"_Bridge",3,"50ps","50ps") - bridge(internal_network_map[str(node)], midnet) - bridge(external_network, midnet) - - diff --git a/src/sst/elements/opal/tests/refFiles/test_Opal_basic_1node_1smp.out b/src/sst/elements/opal/tests/refFiles/test_Opal_basic_1node_1smp.out deleted file mode 100644 index c6238805ac..0000000000 --- a/src/sst/elements/opal/tests/refFiles/test_Opal_basic_1node_1smp.out +++ /dev/null @@ -1,125 +0,0 @@ -ArielComponent[arielcpu.cc:38:ArielCPU] Creating Ariel component... -ArielComponent[arielcpu.cc:44:ArielCPU] Configuring for 2 cores... -ArielComponent[arielcpu.cc:47:ArielCPU] Configuring for check addresses = no -ArielComponent[arielcpu.cc:120:ArielCPU] Loaded memory manager: cpu:memmgr -ArielComponent[arielcpu.cc:134:ArielCPU] Memory manager construction is completed. -Pin2Frontend[frontend/simple/pin2frontend.cc:75:Pin2Frontend] Model specifies that there are 0 application arguments -Pin2Frontend[frontend/simple/pin2frontend.cc:82:Pin2Frontend] Interception and instrumentation of multi-level memory and malloc/free calls is DISABLED. -Pin2Frontend[frontend/simple/pin2frontend.cc:90:Pin2Frontend] Tracking the stack and dumping on malloc calls is DISABLED. -Pin2Frontend[frontend/simple/pin2frontend.cc:95:Pin2Frontend] Malloc map file is DISABLED -Pin2Frontend[frontend/simple/pin2frontend.cc:104:Pin2Frontend] Base pipe name: /sst_shmem_57403-0-1681692777 -Pin2Frontend[frontend/simple/pin2frontend.cc:128:Pin2Frontend] Processing application arguments... -Pin2Frontend[frontend/simple/pin2frontend.cc:258:Pin2Frontend] Completed processing application arguments. -Pin2Frontend[frontend/simple/pin2frontend.cc:263:Pin2Frontend] Completed initialization of the Ariel CPU. -ArielComponent[arielcpu.cc:170:ArielCPU] Registering ArielCPU clock at 2GHz -ArielComponent[arielcpu.cc:174:ArielCPU] Clocks registered. -ArielComponent[arielcpu.cc:176:ArielCPU] Creating core to cache links... -ArielComponent[arielcpu.cc:178:ArielCPU] Creating processor cores and cache links... -ArielComponent[arielcpu.cc:180:ArielCPU] Configuring cores and cache links... -ArielComponent[arielcpu.cc:239:ArielCPU] Completed initialization of the Ariel CPU. -Initialized with 2 cores -Before initialization -Assigning the PTW correctly -mmu:pagefaulthandler register handler -Assigning the PTW correctly -mmu:pagefaulthandler register handler -After initialization -l2cache_0: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -l2cache_1: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -l2cache_2: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -l2cache_3: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -l3cache: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 3 cycles. -local_memory, WARNING: loading backend in legacy mode (from parameter set). Instead, load backend into this controller's 'backend' slot via ctrl.setSubComponent() in configuration. -0:TimingDRAM::build():57:mc=0: number of channels: 2 -0:TimingDRAM::build():58:mc=0: address mapper: memHierarchy.roundRobinAddrMapper -0:TimingDRAM:Channel:Channel():116:mc=0:chan=0: max pending trans: 32 -0:TimingDRAM:Channel:Channel():117:mc=0:chan=0: number of ranks: 2 -0:TimingDRAM:Rank:Rank():226:mc=0:chan=0:rank=0: number of banks: 16 -0:TimingDRAM:Bank:Bank():294:mc=0:chan=0:rank=0:bank=0: CL: 14 -0:TimingDRAM:Bank:Bank():295:mc=0:chan=0:rank=0:bank=0: CL_WR: 12 -0:TimingDRAM:Bank:Bank():296:mc=0:chan=0:rank=0:bank=0: RCD: 14 -0:TimingDRAM:Bank:Bank():297:mc=0:chan=0:rank=0:bank=0: TRP: 14 -0:TimingDRAM:Bank:Bank():298:mc=0:chan=0:rank=0:bank=0: dataCycles: 2 -0:TimingDRAM:Bank:Bank():299:mc=0:chan=0:rank=0:bank=0: transactionQ: memHierarchy.fifoTransactionQ -0:TimingDRAM:Bank:Bank():300:mc=0:chan=0:rank=0:bank=0: pagePolicy: memHierarchy.simplePagePolicy -dc, Warning: getting region parameters (addr_range_start/end, interleave_step/size) from link subcomponent. In the future this will not be supported and region parameters should be declared in the directory's parameters instead. -ExtMemDc, Warning: getting region parameters (addr_range_start/end, interleave_step/size) from link subcomponent. In the future this will not be supported and region parameters should be declared in the directory's parameters instead. -The value of tRCD is 300 -After initialization -SSTARIEL: Loading Ariel Tool to connect to SST on pipe: /sst_shmem_57403-0-1681692777 max core count: 2 -SSTARIEL: Function profiling is disabled. -Pin2Frontend[frontend/simple/pin2frontend.cc:270:init] Launching PIN... -Pin2Frontend[frontend/simple/pin2frontend.cc:326:forkPINChild] Executing PIN command: /home/vamsee/SST_PORTED/pin-2.14-71313-gcc.4.4.7-linux/pin.sh -follow_execv -ifeellucky -t /home/vamsee/forked/BUILD_ELEMENTS/libexec/fesimple.so -w 0 -E 1 -p /sst_shmem_57403-0-1681692777 -v 1 -t 0 -c 2 -s 0 -m 0 -k 0 -d 0 -- ./app/opal_test -Pin2Frontend[frontend/simple/pin2frontend.cc:275:init] Returned from launching PIN. Waiting for child to attach. -Pin2Frontend[frontend/simple/pin2frontend.cc:278:init] Child has attached! -ArielComponent[arielcpu.cc:257:finish] Ariel Processor Information: -ArielComponent[arielcpu.cc:258:finish] Completed at: 1080738 nanoseconds. -ArielComponent[arielcpu.cc:259:finish] Ariel Component Statistics (By Core) - -Ariel Memory Management Statistics: ---------------------------------------------------------------------- -Page Table Sizes: -- Map entries 38 -Page Table Coverages: -- Bytes 155648 - cpu.read_requests.0 : Accumulator : Sum.u64 = 2865; SumSQ.u64 = 2865; Count.u64 = 2865; Min.u64 = 1; Max.u64 = 1; - cpu.write_requests.0 : Accumulator : Sum.u64 = 1337; SumSQ.u64 = 1337; Count.u64 = 1337; Min.u64 = 1; Max.u64 = 1; - cpu.read_request_sizes.0 : Accumulator : Sum.u64 = 16664; SumSQ.u64 = 124680; Count.u64 = 2865; Min.u64 = 1; Max.u64 = 64; - cpu.write_request_sizes.0 : Accumulator : Sum.u64 = 9662; SumSQ.u64 = 83528; Count.u64 = 1337; Min.u64 = 1; Max.u64 = 64; - cpu.split_read_requests.0 : Accumulator : Sum.u64 = 2; SumSQ.u64 = 2; Count.u64 = 2; Min.u64 = 1; Max.u64 = 1; - cpu.split_write_requests.0 : Accumulator : Sum.u64 = 1; SumSQ.u64 = 1; Count.u64 = 1; Min.u64 = 1; Max.u64 = 1; - cpu.flush_requests.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fence_requests.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.no_ops.0 : Accumulator : Sum.u64 = 5798; SumSQ.u64 = 5798; Count.u64 = 5798; Min.u64 = 1; Max.u64 = 1; - cpu.instruction_count.0 : Accumulator : Sum.u64 = 10000; SumSQ.u64 = 10000; Count.u64 = 10000; Min.u64 = 1; Max.u64 = 1; - cpu.cycles.0 : Accumulator : Sum.u64 = 2161476; SumSQ.u64 = 2161476; Count.u64 = 2161476; Min.u64 = 1; Max.u64 = 1; - cpu.active_cycles.0 : Accumulator : Sum.u64 = 13377; SumSQ.u64 = 13377; Count.u64 = 13377; Min.u64 = 1; Max.u64 = 1; - cpu.fp_sp_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_dp_ins.0 : Accumulator : Sum.u64 = 122; SumSQ.u64 = 122; Count.u64 = 122; Min.u64 = 1; Max.u64 = 1; - cpu.fp_sp_simd_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_dp_simd_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_sp_scalar_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_dp_scalar_ins.0 : Accumulator : Sum.u64 = 122; SumSQ.u64 = 122; Count.u64 = 122; Min.u64 = 1; Max.u64 = 1; - cpu.fp_sp_ops.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_dp_ops.0 : Accumulator : Sum.u64 = 122; SumSQ.u64 = 122; Count.u64 = 122; Min.u64 = 1; Max.u64 = 1; - cpu.read_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.write_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.read_request_sizes.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.write_request_sizes.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.split_read_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.split_write_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.flush_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fence_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.no_ops.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.instruction_count.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.cycles.1 : Accumulator : Sum.u64 = 2161475; SumSQ.u64 = 2161475; Count.u64 = 2161475; Min.u64 = 1; Max.u64 = 1; - cpu.active_cycles.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_sp_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_dp_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_sp_simd_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_dp_simd_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_sp_scalar_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_dp_scalar_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_sp_ops.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - cpu.fp_dp_ops.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_hits.Core0_PTWC : Accumulator : Sum.u64 = 8; SumSQ.u64 = 8; Count.u64 = 8; Min.u64 = 1; Max.u64 = 1; - mmu.tlb_misses.Core0_PTWC : Accumulator : Sum.u64 = 42; SumSQ.u64 = 42; Count.u64 = 42; Min.u64 = 1; Max.u64 = 1; - mmu.total_waiting.0 : Accumulator : Sum.u64 = 56937; SumSQ.u64 = 40552311; Count.u64 = 4205; Min.u64 = 1; Max.u64 = 2170; - mmu.tlb_hits.Core0_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_misses.Core0_L2 : Accumulator : Sum.u64 = 50; SumSQ.u64 = 50; Count.u64 = 50; Min.u64 = 1; Max.u64 = 1; - mmu.tlb_shootdown.Core0_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_hits.Core0_L1 : Accumulator : Sum.u64 = 4095; SumSQ.u64 = 4095; Count.u64 = 4095; Min.u64 = 1; Max.u64 = 1; - mmu.tlb_misses.Core0_L1 : Accumulator : Sum.u64 = 110; SumSQ.u64 = 110; Count.u64 = 110; Min.u64 = 1; Max.u64 = 1; - mmu.tlb_shootdown.Core0_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_hits.Core1_PTWC : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_misses.Core1_PTWC : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.total_waiting.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_hits.Core1_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_misses.Core1_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_shootdown.Core1_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_hits.Core1_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_misses.Core1_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - mmu.tlb_shootdown.Core1_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - opal.local_mem_usage.0 : Accumulator : Sum.u64 = 28; SumSQ.u64 = 28; Count.u64 = 28; Min.u64 = 1; Max.u64 = 1; - opal.shared_mem_usage.0 : Accumulator : Sum.u64 = 27; SumSQ.u64 = 27; Count.u64 = 27; Min.u64 = 1; Max.u64 = 1; -Simulation is complete, simulated time: 1.08074 ms diff --git a/src/sst/elements/opal/tests/refFiles/test_Opal_basic_2node_1smp.out b/src/sst/elements/opal/tests/refFiles/test_Opal_basic_2node_1smp.out deleted file mode 100644 index 72626d7588..0000000000 --- a/src/sst/elements/opal/tests/refFiles/test_Opal_basic_2node_1smp.out +++ /dev/null @@ -1,234 +0,0 @@ -ArielComponent[arielcpu.cc:38:ArielCPU] Creating Ariel component... -ArielComponent[arielcpu.cc:44:ArielCPU] Configuring for 2 cores... -ArielComponent[arielcpu.cc:47:ArielCPU] Configuring for check addresses = no -ArielComponent[arielcpu.cc:120:ArielCPU] Loaded memory manager: node0_cpu:memmgr -ArielComponent[arielcpu.cc:134:ArielCPU] Memory manager construction is completed. -Pin2Frontend[frontend/simple/pin2frontend.cc:75:Pin2Frontend] Model specifies that there are 0 application arguments -Pin2Frontend[frontend/simple/pin2frontend.cc:82:Pin2Frontend] Interception and instrumentation of multi-level memory and malloc/free calls is DISABLED. -Pin2Frontend[frontend/simple/pin2frontend.cc:90:Pin2Frontend] Tracking the stack and dumping on malloc calls is DISABLED. -Pin2Frontend[frontend/simple/pin2frontend.cc:95:Pin2Frontend] Malloc map file is DISABLED -Pin2Frontend[frontend/simple/pin2frontend.cc:104:Pin2Frontend] Base pipe name: /sst_shmem_57429-1-1681692777 -Pin2Frontend[frontend/simple/pin2frontend.cc:128:Pin2Frontend] Processing application arguments... -Pin2Frontend[frontend/simple/pin2frontend.cc:258:Pin2Frontend] Completed processing application arguments. -Pin2Frontend[frontend/simple/pin2frontend.cc:263:Pin2Frontend] Completed initialization of the Ariel CPU. -ArielComponent[arielcpu.cc:170:ArielCPU] Registering ArielCPU clock at 2GHz -ArielComponent[arielcpu.cc:174:ArielCPU] Clocks registered. -ArielComponent[arielcpu.cc:176:ArielCPU] Creating core to cache links... -ArielComponent[arielcpu.cc:178:ArielCPU] Creating processor cores and cache links... -ArielComponent[arielcpu.cc:180:ArielCPU] Configuring cores and cache links... -ArielComponent[arielcpu.cc:239:ArielCPU] Completed initialization of the Ariel CPU. -Initialized with 2 cores -Before initialization -Assigning the PTW correctly -node0_mmu:pagefaulthandler register handler -Assigning the PTW correctly -node0_mmu:pagefaulthandler register handler -After initialization -node0_l2cache_0: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -node0_l2cache_1: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -node0_l2cache_2: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -node0_l2cache_3: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -node0_l3cache: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 3 cycles. -node0_local_memory, WARNING: loading backend in legacy mode (from parameter set). Instead, load backend into this controller's 'backend' slot via ctrl.setSubComponent() in configuration. -0:TimingDRAM::build():57:mc=0: number of channels: 2 -0:TimingDRAM::build():58:mc=0: address mapper: memHierarchy.roundRobinAddrMapper -0:TimingDRAM:Channel:Channel():116:mc=0:chan=0: max pending trans: 32 -0:TimingDRAM:Channel:Channel():117:mc=0:chan=0: number of ranks: 2 -0:TimingDRAM:Rank:Rank():226:mc=0:chan=0:rank=0: number of banks: 16 -0:TimingDRAM:Bank:Bank():294:mc=0:chan=0:rank=0:bank=0: CL: 14 -0:TimingDRAM:Bank:Bank():295:mc=0:chan=0:rank=0:bank=0: CL_WR: 12 -0:TimingDRAM:Bank:Bank():296:mc=0:chan=0:rank=0:bank=0: RCD: 14 -0:TimingDRAM:Bank:Bank():297:mc=0:chan=0:rank=0:bank=0: TRP: 14 -0:TimingDRAM:Bank:Bank():298:mc=0:chan=0:rank=0:bank=0: dataCycles: 2 -0:TimingDRAM:Bank:Bank():299:mc=0:chan=0:rank=0:bank=0: transactionQ: memHierarchy.fifoTransactionQ -0:TimingDRAM:Bank:Bank():300:mc=0:chan=0:rank=0:bank=0: pagePolicy: memHierarchy.simplePagePolicy -node0_dc, Warning: getting region parameters (addr_range_start/end, interleave_step/size) from link subcomponent. In the future this will not be supported and region parameters should be declared in the directory's parameters instead. -ArielComponent[arielcpu.cc:38:ArielCPU] Creating Ariel component... -ArielComponent[arielcpu.cc:44:ArielCPU] Configuring for 2 cores... -ArielComponent[arielcpu.cc:47:ArielCPU] Configuring for check addresses = no -ArielComponent[arielcpu.cc:120:ArielCPU] Loaded memory manager: node1_cpu:memmgr -ArielComponent[arielcpu.cc:134:ArielCPU] Memory manager construction is completed. -Pin2Frontend[frontend/simple/pin2frontend.cc:75:Pin2Frontend] Model specifies that there are 0 application arguments -Pin2Frontend[frontend/simple/pin2frontend.cc:82:Pin2Frontend] Interception and instrumentation of multi-level memory and malloc/free calls is DISABLED. -Pin2Frontend[frontend/simple/pin2frontend.cc:90:Pin2Frontend] Tracking the stack and dumping on malloc calls is DISABLED. -Pin2Frontend[frontend/simple/pin2frontend.cc:95:Pin2Frontend] Malloc map file is DISABLED -Pin2Frontend[frontend/simple/pin2frontend.cc:104:Pin2Frontend] Base pipe name: /sst_shmem_57429-15-1714636915 -Pin2Frontend[frontend/simple/pin2frontend.cc:128:Pin2Frontend] Processing application arguments... -Pin2Frontend[frontend/simple/pin2frontend.cc:258:Pin2Frontend] Completed processing application arguments. -Pin2Frontend[frontend/simple/pin2frontend.cc:263:Pin2Frontend] Completed initialization of the Ariel CPU. -ArielComponent[arielcpu.cc:170:ArielCPU] Registering ArielCPU clock at 2GHz -ArielComponent[arielcpu.cc:174:ArielCPU] Clocks registered. -ArielComponent[arielcpu.cc:176:ArielCPU] Creating core to cache links... -ArielComponent[arielcpu.cc:178:ArielCPU] Creating processor cores and cache links... -ArielComponent[arielcpu.cc:180:ArielCPU] Configuring cores and cache links... -ArielComponent[arielcpu.cc:239:ArielCPU] Completed initialization of the Ariel CPU. -Initialized with 2 cores -Before initialization -Assigning the PTW correctly -node1_mmu:pagefaulthandler register handler -Assigning the PTW correctly -node1_mmu:pagefaulthandler register handler -After initialization -node1_l2cache_0: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -node1_l2cache_1: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -node1_l2cache_2: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -node1_l2cache_3: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 2 cycles. -node1_l3cache: No MSHR lookup latency provided (mshr_latency_cycles)...intrapolated to 3 cycles. -node1_local_memory, WARNING: loading backend in legacy mode (from parameter set). Instead, load backend into this controller's 'backend' slot via ctrl.setSubComponent() in configuration. -node1_dc, Warning: getting region parameters (addr_range_start/end, interleave_step/size) from link subcomponent. In the future this will not be supported and region parameters should be declared in the directory's parameters instead. -ExtMemDc, Warning: getting region parameters (addr_range_start/end, interleave_step/size) from link subcomponent. In the future this will not be supported and region parameters should be declared in the directory's parameters instead. -The value of tRCD is 300 -After initialization -SSTARIEL: Loading Ariel Tool to connect to SST on pipe: /sst_shmem_57429-1-1681692777 max core count: 2 -SSTARIEL: Function profiling is disabled. -SSTARIEL: Loading Ariel Tool to connect to SST on pipe: /sst_shmem_57429-15-1714636915 max core count: 2 -SSTARIEL: Function profiling is disabled. -Pin2Frontend[frontend/simple/pin2frontend.cc:270:init] Launching PIN... -Pin2Frontend[frontend/simple/pin2frontend.cc:326:forkPINChild] Executing PIN command: /home/vamsee/SST_PORTED/pin-2.14-71313-gcc.4.4.7-linux/pin.sh -follow_execv -ifeellucky -t /home/vamsee/forked/BUILD_ELEMENTS/libexec/fesimple.so -w 0 -E 1 -p /sst_shmem_57429-1-1681692777 -v 1 -t 0 -c 2 -s 0 -m 0 -k 0 -d 0 -- ./app/opal_test -Pin2Frontend[frontend/simple/pin2frontend.cc:275:init] Returned from launching PIN. Waiting for child to attach. -Pin2Frontend[frontend/simple/pin2frontend.cc:278:init] Child has attached! -Pin2Frontend[frontend/simple/pin2frontend.cc:270:init] Launching PIN... -Pin2Frontend[frontend/simple/pin2frontend.cc:326:forkPINChild] Executing PIN command: /home/vamsee/SST_PORTED/pin-2.14-71313-gcc.4.4.7-linux/pin.sh -follow_execv -ifeellucky -t /home/vamsee/forked/BUILD_ELEMENTS/libexec/fesimple.so -w 0 -E 1 -p /sst_shmem_57429-15-1714636915 -v 1 -t 0 -c 2 -s 0 -m 0 -k 0 -d 0 -- ./app/opal_test -Pin2Frontend[frontend/simple/pin2frontend.cc:275:init] Returned from launching PIN. Waiting for child to attach. -Pin2Frontend[frontend/simple/pin2frontend.cc:278:init] Child has attached! -ArielComponent[arielcpu.cc:257:finish] Ariel Processor Information: -ArielComponent[arielcpu.cc:258:finish] Completed at: 657421 nanoseconds. -ArielComponent[arielcpu.cc:259:finish] Ariel Component Statistics (By Core) - -Ariel Memory Management Statistics: ---------------------------------------------------------------------- -Page Table Sizes: -- Map entries 37 -Page Table Coverages: -- Bytes 151552 -ArielComponent[arielcpu.cc:257:finish] Ariel Processor Information: -ArielComponent[arielcpu.cc:258:finish] Completed at: 657421 nanoseconds. -ArielComponent[arielcpu.cc:259:finish] Ariel Component Statistics (By Core) - -Ariel Memory Management Statistics: ---------------------------------------------------------------------- -Page Table Sizes: -- Map entries 37 -Page Table Coverages: -- Bytes 151552 - opal.local_mem_usage.0 : Accumulator : Sum.u64 = 28; SumSQ.u64 = 28; Count.u64 = 28; Min.u64 = 1; Max.u64 = 1; - opal.shared_mem_usage.0 : Accumulator : Sum.u64 = 27; SumSQ.u64 = 27; Count.u64 = 27; Min.u64 = 1; Max.u64 = 1; - opal.local_mem_usage.1 : Accumulator : Sum.u64 = 27; SumSQ.u64 = 27; Count.u64 = 27; Min.u64 = 1; Max.u64 = 1; - opal.shared_mem_usage.1 : Accumulator : Sum.u64 = 26; SumSQ.u64 = 26; Count.u64 = 26; Min.u64 = 1; Max.u64 = 1; - node0_cpu.read_requests.0 : Accumulator : Sum.u64 = 2865; SumSQ.u64 = 2865; Count.u64 = 2865; Min.u64 = 1; Max.u64 = 1; - node0_cpu.write_requests.0 : Accumulator : Sum.u64 = 1338; SumSQ.u64 = 1338; Count.u64 = 1338; Min.u64 = 1; Max.u64 = 1; - node0_cpu.read_request_sizes.0 : Accumulator : Sum.u64 = 16664; SumSQ.u64 = 124680; Count.u64 = 2865; Min.u64 = 1; Max.u64 = 64; - node0_cpu.write_request_sizes.0 : Accumulator : Sum.u64 = 9670; SumSQ.u64 = 83592; Count.u64 = 1338; Min.u64 = 1; Max.u64 = 64; - node0_cpu.split_read_requests.0 : Accumulator : Sum.u64 = 2; SumSQ.u64 = 2; Count.u64 = 2; Min.u64 = 1; Max.u64 = 1; - node0_cpu.split_write_requests.0 : Accumulator : Sum.u64 = 1; SumSQ.u64 = 1; Count.u64 = 1; Min.u64 = 1; Max.u64 = 1; - node0_cpu.flush_requests.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fence_requests.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.no_ops.0 : Accumulator : Sum.u64 = 5798; SumSQ.u64 = 5798; Count.u64 = 5798; Min.u64 = 1; Max.u64 = 1; - node0_cpu.instruction_count.0 : Accumulator : Sum.u64 = 10001; SumSQ.u64 = 10001; Count.u64 = 10001; Min.u64 = 1; Max.u64 = 1; - node0_cpu.cycles.0 : Accumulator : Sum.u64 = 60312; SumSQ.u64 = 60312; Count.u64 = 60312; Min.u64 = 1; Max.u64 = 1; - node0_cpu.active_cycles.0 : Accumulator : Sum.u64 = 13432; SumSQ.u64 = 13432; Count.u64 = 13432; Min.u64 = 1; Max.u64 = 1; - node0_cpu.fp_sp_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_dp_ins.0 : Accumulator : Sum.u64 = 127; SumSQ.u64 = 127; Count.u64 = 127; Min.u64 = 1; Max.u64 = 1; - node0_cpu.fp_sp_simd_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_dp_simd_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_sp_scalar_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_dp_scalar_ins.0 : Accumulator : Sum.u64 = 127; SumSQ.u64 = 127; Count.u64 = 127; Min.u64 = 1; Max.u64 = 1; - node0_cpu.fp_sp_ops.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_dp_ops.0 : Accumulator : Sum.u64 = 127; SumSQ.u64 = 127; Count.u64 = 127; Min.u64 = 1; Max.u64 = 1; - node0_cpu.read_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.write_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.read_request_sizes.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.write_request_sizes.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.split_read_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.split_write_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.flush_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fence_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.no_ops.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.instruction_count.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.cycles.1 : Accumulator : Sum.u64 = 60311; SumSQ.u64 = 60311; Count.u64 = 60311; Min.u64 = 1; Max.u64 = 1; - node0_cpu.active_cycles.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_sp_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_dp_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_sp_simd_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_dp_simd_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_sp_scalar_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_dp_scalar_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_sp_ops.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_cpu.fp_dp_ops.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_hits.Core0_PTWC : Accumulator : Sum.u64 = 7; SumSQ.u64 = 7; Count.u64 = 7; Min.u64 = 1; Max.u64 = 1; - node0_mmu.tlb_misses.Core0_PTWC : Accumulator : Sum.u64 = 40; SumSQ.u64 = 40; Count.u64 = 40; Min.u64 = 1; Max.u64 = 1; - node0_mmu.total_waiting.0 : Accumulator : Sum.u64 = 66211; SumSQ.u64 = 52679535; Count.u64 = 4206; Min.u64 = 1; Max.u64 = 2224; - node0_mmu.tlb_hits.Core0_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_misses.Core0_L2 : Accumulator : Sum.u64 = 47; SumSQ.u64 = 47; Count.u64 = 47; Min.u64 = 1; Max.u64 = 1; - node0_mmu.tlb_shootdown.Core0_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_hits.Core0_L1 : Accumulator : Sum.u64 = 4107; SumSQ.u64 = 4107; Count.u64 = 4107; Min.u64 = 1; Max.u64 = 1; - node0_mmu.tlb_misses.Core0_L1 : Accumulator : Sum.u64 = 99; SumSQ.u64 = 99; Count.u64 = 99; Min.u64 = 1; Max.u64 = 1; - node0_mmu.tlb_shootdown.Core0_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_hits.Core1_PTWC : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_misses.Core1_PTWC : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.total_waiting.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_hits.Core1_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_misses.Core1_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_shootdown.Core1_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_hits.Core1_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_misses.Core1_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node0_mmu.tlb_shootdown.Core1_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.read_requests.0 : Accumulator : Sum.u64 = 2865; SumSQ.u64 = 2865; Count.u64 = 2865; Min.u64 = 1; Max.u64 = 1; - node1_cpu.write_requests.0 : Accumulator : Sum.u64 = 1338; SumSQ.u64 = 1338; Count.u64 = 1338; Min.u64 = 1; Max.u64 = 1; - node1_cpu.read_request_sizes.0 : Accumulator : Sum.u64 = 16664; SumSQ.u64 = 124680; Count.u64 = 2865; Min.u64 = 1; Max.u64 = 64; - node1_cpu.write_request_sizes.0 : Accumulator : Sum.u64 = 9670; SumSQ.u64 = 83592; Count.u64 = 1338; Min.u64 = 1; Max.u64 = 64; - node1_cpu.split_read_requests.0 : Accumulator : Sum.u64 = 2; SumSQ.u64 = 2; Count.u64 = 2; Min.u64 = 1; Max.u64 = 1; - node1_cpu.split_write_requests.0 : Accumulator : Sum.u64 = 1; SumSQ.u64 = 1; Count.u64 = 1; Min.u64 = 1; Max.u64 = 1; - node1_cpu.flush_requests.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fence_requests.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.no_ops.0 : Accumulator : Sum.u64 = 5798; SumSQ.u64 = 5798; Count.u64 = 5798; Min.u64 = 1; Max.u64 = 1; - node1_cpu.instruction_count.0 : Accumulator : Sum.u64 = 10001; SumSQ.u64 = 10001; Count.u64 = 10001; Min.u64 = 1; Max.u64 = 1; - node1_cpu.cycles.0 : Accumulator : Sum.u64 = 1314843; SumSQ.u64 = 1314843; Count.u64 = 1314843; Min.u64 = 1; Max.u64 = 1; - node1_cpu.active_cycles.0 : Accumulator : Sum.u64 = 12494; SumSQ.u64 = 12494; Count.u64 = 12494; Min.u64 = 1; Max.u64 = 1; - node1_cpu.fp_sp_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_dp_ins.0 : Accumulator : Sum.u64 = 122; SumSQ.u64 = 122; Count.u64 = 122; Min.u64 = 1; Max.u64 = 1; - node1_cpu.fp_sp_simd_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_dp_simd_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_sp_scalar_ins.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_dp_scalar_ins.0 : Accumulator : Sum.u64 = 122; SumSQ.u64 = 122; Count.u64 = 122; Min.u64 = 1; Max.u64 = 1; - node1_cpu.fp_sp_ops.0 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_dp_ops.0 : Accumulator : Sum.u64 = 122; SumSQ.u64 = 122; Count.u64 = 122; Min.u64 = 1; Max.u64 = 1; - node1_cpu.read_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.write_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.read_request_sizes.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.write_request_sizes.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.split_read_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.split_write_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.flush_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fence_requests.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.no_ops.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.instruction_count.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.cycles.1 : Accumulator : Sum.u64 = 1314842; SumSQ.u64 = 1314842; Count.u64 = 1314842; Min.u64 = 1; Max.u64 = 1; - node1_cpu.active_cycles.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_sp_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_dp_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_sp_simd_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_dp_simd_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_sp_scalar_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_dp_scalar_ins.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_sp_ops.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_cpu.fp_dp_ops.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_hits.Core0_PTWC : Accumulator : Sum.u64 = 7; SumSQ.u64 = 7; Count.u64 = 7; Min.u64 = 1; Max.u64 = 1; - node1_mmu.tlb_misses.Core0_PTWC : Accumulator : Sum.u64 = 45; SumSQ.u64 = 45; Count.u64 = 45; Min.u64 = 1; Max.u64 = 1; - node1_mmu.total_waiting.0 : Accumulator : Sum.u64 = 57913; SumSQ.u64 = 42467155; Count.u64 = 4205; Min.u64 = 1; Max.u64 = 2087; - node1_mmu.tlb_hits.Core0_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_misses.Core0_L2 : Accumulator : Sum.u64 = 52; SumSQ.u64 = 52; Count.u64 = 52; Min.u64 = 1; Max.u64 = 1; - node1_mmu.tlb_shootdown.Core0_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_hits.Core0_L1 : Accumulator : Sum.u64 = 4111; SumSQ.u64 = 4111; Count.u64 = 4111; Min.u64 = 1; Max.u64 = 1; - node1_mmu.tlb_misses.Core0_L1 : Accumulator : Sum.u64 = 94; SumSQ.u64 = 94; Count.u64 = 94; Min.u64 = 1; Max.u64 = 1; - node1_mmu.tlb_shootdown.Core0_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_hits.Core1_PTWC : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_misses.Core1_PTWC : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.total_waiting.1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_hits.Core1_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_misses.Core1_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_shootdown.Core1_L2 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_hits.Core1_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_misses.Core1_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; - node1_mmu.tlb_shootdown.Core1_L1 : Accumulator : Sum.u64 = 0; SumSQ.u64 = 0; Count.u64 = 0; Min.u64 = 0; Max.u64 = 0; -Simulation is complete, simulated time: 657.422 us diff --git a/src/sst/elements/serrano/Makefile.am b/src/sst/elements/serrano/Makefile.am deleted file mode 100644 index 107e11b29c..0000000000 --- a/src/sst/elements/serrano/Makefile.am +++ /dev/null @@ -1,29 +0,0 @@ -# -*- Makefile -*- -# -# - -AM_CPPFLAGS += \ - $(MPI_CPPFLAGS) - -compdir = $(pkglibdir) -comp_LTLIBRARIES = libserrano.la -libserrano_la_SOURCES = \ - scircq.h \ - sercgunit.h \ - seriterunit.h \ - serprintunit.h \ - serrano.cc \ - serrano.h \ - serstdunit.h \ - smsg.h - -EXTRA_DIST = \ - tests/test_serrano.py \ - tests/graphs/sum.graph - -libserrano_la_LDFLAGS = -module -avoid-version - -install-exec-hook: - $(SST_REGISTER_TOOL) SST_ELEMENT_SOURCE serrano=$(abs_srcdir) - $(SST_REGISTER_TOOL) SST_ELEMENT_TESTS serrano=$(abs_srcdir)/tests - diff --git a/src/sst/elements/serrano/scircq.h b/src/sst/elements/serrano/scircq.h deleted file mode 100644 index 581965a7af..0000000000 --- a/src/sst/elements/serrano/scircq.h +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright 2009-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _H_SERRANO_CIRC_Q -#define _H_SERRANO_CIRC_Q - -#include - -namespace SST { -namespace Serrano { - -template -class SerranoCircularQueue { -public: - SerranoCircularQueue( const size_t size ) : - max_capacity(size) { - - front = 0; - back = 0; - - count = 0; - data = new T[size]; - } - - ~SerranoCircularQueue() { - delete[] data; - } - - bool empty() { - return (front == back); - } - - bool full() { - return ( safe_inc(back) == front ); - } - - void push(T item) { - data[back] = item; - back = safe_inc(back); - count++; - } - - T peek() { - return data[front]; - } - - T peek( const size_t index ) { - return data[ (front+index) % max_capacity ]; - } - - T pop() { - T temp = data[front]; - front = safe_inc(front); - count--; - return temp; - } - - size_t size() const { - return count; - } - - size_t capacity() const { - return max_capacity; - } - - void clear() { - front = 0; - back = 0; - } - -private: - size_t safe_inc(size_t v) { - return (v+1) % max_capacity; - } - - size_t front; - size_t back; - size_t count; - const size_t max_capacity; - T* data; - -}; - -} -} - -#endif diff --git a/src/sst/elements/serrano/sercgunit.h b/src/sst/elements/serrano/sercgunit.h deleted file mode 100644 index c09588f0f6..0000000000 --- a/src/sst/elements/serrano/sercgunit.h +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2009-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. -\ -#ifndef _H_SERRANO_COARSE_UNIT -#define _H_SERRANO_COARSE_UNIT - -#include - -#include -#include -#include - -#include "scircq.h" -#include "smsg.h" - -namespace SST { -namespace Serrano { - -enum SerranoStandardType { - TYPE_INT32, - TYPE_INT64, - TYPE_FP32, - TYPE_FP64, - TYPE_CUSTOM -}; - -class SerranoCoarseUnit : public SST::SubComponent { - -public: - SST_ELI_REGISTER_SUBCOMPONENT_API( SST::Serrano::SerranoCoarseUnit ) - - SerranoCoarseUnit( SST::ComponentId_t id, Params& params ) : - SubComponent(id) { - - int verbosity = params.find("verbose", 0); - char* comp_name = new char[64]; - snprintf(comp_name, 64, "[cgra]: "); - - output = new SST::Output(comp_name, verbosity, 0, Output::STDOUT ); - } - - ~SerranoCoarseUnit() { - delete output; - } - - virtual bool stillProcessing() = 0; - virtual void execute( const uint64_t current_cycle ) = 0; - - void addInputQueue( SerranoCircularQueue* new_q ) { - output->verbose(CALL_INFO, 4, 0, "Added input queue.\n"); - input_qs.push_back( new_q ); - } - - void addOutputQueue( SerranoCircularQueue* new_q ) { - output->verbose(CALL_INFO, 4, 0, "Added output queue.\n"); - output_qs.push_back( new_q ); - } - - virtual const char* getUnitTypeString() = 0; - - size_t countInputQueues() const { return input_qs.size(); } - size_t countOutputQueues() const { return output_qs.size(); } - - virtual void checkRequiredQueues( SST::Output* output ) = 0; - -protected: - SST::Output* output; - std::vector< SerranoCircularQueue* > input_qs; - std::vector< SerranoCircularQueue* > output_qs; - -}; - -} -} - -#endif diff --git a/src/sst/elements/serrano/seriterunit.h b/src/sst/elements/serrano/seriterunit.h deleted file mode 100644 index 43f516bb1d..0000000000 --- a/src/sst/elements/serrano/seriterunit.h +++ /dev/null @@ -1,176 +0,0 @@ -// Copyright 2009-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _H_SERRANO_ITERATOR_UNIT -#define _H_SERRANO_ITERATOR_UNIT - -#include "sercgunit.h" -#include - -namespace SST { -namespace Serrano { - -class SerranoIteratorUnit : public SerranoCoarseUnit { - -public: - SST_ELI_REGISTER_SUBCOMPONENT( - SST::Serrano::SerranoIteratorUnit, - "serrano", - "SerranoIteratorUnit", - SST_ELI_ELEMENT_VERSION(1, 0, 0), - "Performs iteration-like behavior", - SST::Serrano::SerranoCoarseUnit - ) - - SST_ELI_DOCUMENT_PARAMS( - { "start", "Value to start iterating at." }, - { "end", "Value to stop iterating at." }, - { "step", "Value to step the iteration with." }, - { "data_type", "Type of the iteration value" } - ) - - SST_ELI_DOCUMENT_STATISTICS() - - SerranoIteratorUnit( SST::ComponentId_t id, SST::Params& params ) : - SerranoCoarseUnit(id, params) { - - func = nullptr; - keep_processing = true; - - const int params_type = params.find("data_type", 1); - - output->verbose(CALL_INFO, 2, 0, "Creating iterator with data-type: %d\n", params_type ); - - switch(params_type) { - case 1: - d_type = TYPE_INT32; - configureIterations( params.find("start", 0), - params.find("step", 1), - params.find("end", std::numeric_limits::max() ) ); - func = std::bind( &SST::Serrano::SerranoIteratorUnit::execute_int32, this ); - break; - case 2: - d_type = TYPE_INT64; - configureIterations( params.find("start", 0), - params.find("step", 1), - params.find("end", std::numeric_limits::max() ) ); - func = std::bind( &SST::Serrano::SerranoIteratorUnit::execute_int64, this ); - break; - case 4: - d_type = TYPE_FP32; - configureIterations( params.find("start", 0 ), - params.find("step", 1.0 ), - params.find("end", std::numeric_limits::max() ) ); - func = std::bind( &SST::Serrano::SerranoIteratorUnit::execute_fp32, this ); - break; - case 8: - d_type = TYPE_FP64; - configureIterations( params.find("start", 0 ), - params.find("step", 1.0 ), - params.find("end", std::numeric_limits::max() ) ); - func = std::bind( &SST::Serrano::SerranoIteratorUnit::execute_fp64, this ); - break; - default: - output->fatal(CALL_INFO, -1, "Error: unknown data type to process.\n"); - break; - } - } - - ~SerranoIteratorUnit() { - - } - - virtual const char* getUnitTypeString() { - return "ITERATOR"; - } - - virtual bool stillProcessing() { - return keep_processing; - } - - virtual void checkRequiredQueues( SST::Output* output ) { - if( output_qs.size() == 0 ) { - output->fatal(CALL_INFO, -1, "Need an output queue for an iterator to work.\n"); - } - } - - virtual void execute( const uint64_t currentCycle ) { - output->verbose(CALL_INFO, 8, 0, "Executing iteration generator...\n"); - - if( nullptr != func ) { - func(); - } - } - -protected: - SerranoStandardType d_type; - std::function func; - void* current_value; - void* max_value; - void* step_value; - bool keep_processing; - - void execute_int32() { - executeStep(); - } - - void execute_int64() { - executeStep(); - } - - void execute_fp32() { - executeStep(); - } - - void execute_fp64() { - executeStep(); - } - - template void executeStep() { - T* t_current_value = (T*) current_value; - T* t_max_value = (T*) max_value; - T* t_step_value = (T*) step_value; - - if( (*t_current_value) < (*t_max_value) ) { - if( ! output_qs[0]->full() ) { - output_qs[0]->push( new SerranoMessage( sizeof(T), t_current_value ) ); - (*t_current_value) += (*t_step_value); - } - } else { - output->verbose(CALL_INFO, 16, 0, "Hit the upper limit of the iteration value, processing is complete for iterator.\n"); - keep_processing = false; - } - } - - template void configureIterations( const T start, const T step, const T end ) { - current_value = (void*) ( new T[1] ); - max_value = (void*) ( new T[1] ); - step_value = (void*) ( new T[1] ); - - T* t_current_value = (T*) current_value; - T* t_max_value = (T*) max_value; - T* t_step_value = (T*) step_value; - - (*t_current_value ) = start; - (*t_max_value ) = end; - (*t_step_value ) = step; - } - -}; - -} -} - -#endif diff --git a/src/sst/elements/serrano/serprintunit.h b/src/sst/elements/serrano/serprintunit.h deleted file mode 100644 index 9bbb5186b7..0000000000 --- a/src/sst/elements/serrano/serprintunit.h +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright 2009-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _H_SERRANO_PRINT_UNIT -#define _H_SERRANO_PRINT_UNIT - -#include -#include "sercgunit.h" - -namespace SST { -namespace Serrano { - -class SerranoPrinterUnit : public SerranoCoarseUnit { -public: - SST_ELI_REGISTER_SUBCOMPONENT( - SST::Serrano::SerranoPrinterUnit, - "serrano", - "SerranoPrinterUnit", - SST_ELI_ELEMENT_VERSION(1, 0, 0), - "Performs printing of a value", - SST::Serrano::SerranoCoarseUnit - ) - - SST_ELI_DOCUMENT_PARAMS() - - SST_ELI_DOCUMENT_STATISTICS() - - SerranoPrinterUnit( SST::ComponentId_t id, SST::Params& params ) : - SerranoCoarseUnit(id, params) { - - const int param_d_type = params.find("data_type", 0); - - switch( param_d_type ) { - case 1: d_type = TYPE_INT32; break; - case 2: d_type = TYPE_INT64; break; - case 4: d_type = TYPE_FP32; break; - case 8: d_type = TYPE_FP64; break; - } - - } - - virtual bool stillProcessing() { - return false; - } - - virtual void execute( const uint64_t current_cycle ) { - print(); - } - - virtual void checkRequiredQueues( SST::Output* output ) { - if( 0 == input_qs.size() ) { - output->fatal(CALL_INFO, -1, "Error - not enough input queues for a printer unit.\n"); - } - } - - virtual const char* getUnitTypeString() { - return "PRINTER"; - } - -protected: - SerranoStandardType d_type; - - void print() { - if(! input_qs[0]->empty() ) { - SerranoMessage* msg = input_qs[0]->pop(); - - switch(d_type) { - case TYPE_INT32: - output->verbose(CALL_INFO, 0, 0, "%" PRId32 "\n", extractValue(output, msg) ); break; - case TYPE_INT64: - output->verbose(CALL_INFO, 0, 0, "%" PRId64 "\n", extractValue(output, msg) ); break; - case TYPE_FP32: - output->verbose(CALL_INFO, 0, 0, "%f\n", extractValue(output, msg) ); break; - case TYPE_FP64: - output->verbose(CALL_INFO, 0, 0, "%f\n", extractValue(output, msg) ); break; - default: - output->fatal(CALL_INFO, -1, "Unknown data type.\n"); - break; - } - - delete msg; - } - } - -}; - -} -} - -#endif diff --git a/src/sst/elements/serrano/serrano.cc b/src/sst/elements/serrano/serrano.cc deleted file mode 100644 index 901ed2f79c..0000000000 --- a/src/sst/elements/serrano/serrano.cc +++ /dev/null @@ -1,270 +0,0 @@ -// Copyright 2009-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#include "serrano.h" - -#include "sercgunit.h" -#include "serstdunit.h" -#include "seriterunit.h" -#include "serprintunit.h" - -#include - -using namespace SST::Serrano; - -SerranoComponent::SerranoComponent( SST::ComponentId_t id, SST::Params& params ) : - Component(id) { - - char* comp_prefix = new char[128]; - snprintf(comp_prefix, 128, "[Ser: %5d] ", (int) id); - - const int verbosity = params.find("verbose", 0); - output = new SST::Output( comp_prefix, verbosity, 0, Output::STDOUT ); - delete[] comp_prefix; - - const std::string clock = params.find("clock", "1GHz"); - output->verbose(CALL_INFO, 2, 0, "Configuring Serrano for clock of %s...\n", clock.c_str()); - registerClock( clock, new Clock::Handler( this, &SerranoComponent::tick ) ); - - constexpr int kernel_name_len = 128; - char* kernel_name = new char[kernel_name_len]; - for( int i = 0; i < std::numeric_limits::max(); ++i ) { - snprintf( kernel_name, kernel_name_len, "kernel%d", i); - std::string kernel_name_file = params.find( kernel_name, "" ); - - if( "" != kernel_name_file) { - output->verbose(CALL_INFO, 4, 0, "Found Kernel (%s): %s\n", kernel_name, kernel_name_file.c_str()); - kernel_queue.push_back( kernel_name_file ); - } else { - break; - } - } - delete[] kernel_name; - - if( kernel_queue.size() > 0 ) { - constructGraph( output, kernel_queue.front().c_str() ); - kernel_queue.pop_front(); - } - - registerAsPrimaryComponent(); - primaryComponentDoNotEndSim(); -} - -SerranoComponent::~SerranoComponent() { - delete output; -} - -bool SerranoComponent::tick( SST::Cycle_t currentCycle ) { - - output->verbose(CALL_INFO, 4, 0, "Clocking Serrano cycle %" PRIu64 "...\n", currentCycle ); - - // Tick all units - for( auto next_unit : units ) { - next_unit.second->execute( currentCycle ); - } - - bool units_continue = false; - bool queues_continue = false; - - // Do we have any units which want to continue processing - for( auto next_unit : units ) { - output->verbose(CALL_INFO, 16, 0, "Unit-ID: %" PRIu64 " status: %s\n", next_unit.first, - ( next_unit.second->stillProcessing() ? "keep-processing" : "completed" ) ); - units_continue |= next_unit.second->stillProcessing(); - } - - // Check that any queue is not empty - for( auto next_q : msg_queues ) { - queues_continue |= ( ! next_q.second->empty() ); - } - - if( units_continue ) { - output->verbose(CALL_INFO, 4, 0, "Work units are still processing, continue for another cycle\n"); - return false; - } else { - if( queues_continue ) { - output->verbose(CALL_INFO, 4, 0, "Queues contain entries that may need processing, continue for another cycle.\n"); - return false; - } else { - output->verbose(CALL_INFO, 4, 0, "Neither queues or units have no work, no need to continue processing.\n"); - primaryComponentOKToEndSim(); - return true; - } - } - -} - -void SerranoComponent::constructGraph( SST::Output* output, const char* kernel_file ) { - output->verbose(CALL_INFO, 4, 0, "Parsing kernel at: %s...\n", kernel_file); - FILE* graph_file = fopen( kernel_file, "rt" ); - - if( nullptr == graph_file ) { - output->fatal(CALL_INFO, -1, "Error: unable to open file: %s\n", kernel_file ); - } - - constexpr int buff_max = 1024; - char* line = new char[buff_max]; - int index = 0; - - Params empty_params; - - while( ! feof( graph_file ) ) { - read_line( graph_file, line, buff_max); - printf("Line[%s]\n", line); - - if( ( 0 == strcmp( line, "" ) ) || ( line[0] == '#') ) { - continue; - } - - char* token = strtok( line, " " ); - char* item_id = strtok( nullptr, " " ); - const uint64_t id = std::atoll( item_id ); - - if( 0 == strcmp( token, "NODE" ) ) { - char* unit_type = strtok( nullptr, " " ); - char* unit_data_type = strtok( nullptr, " " ); - - int iterator_type = 0; - SerranoStandardType node_op_type; - - if( 0 == strcmp( unit_data_type, "INT32" ) ) { - node_op_type = TYPE_INT32; - iterator_type = 1; - } else if( 0 == strcmp( unit_data_type, "INT64" ) ) { - node_op_type = TYPE_INT64; - iterator_type = 2; - } else if( 0 == strcmp( unit_data_type, "FP32" ) ) { - node_op_type = TYPE_FP32; - iterator_type = 4; - } else if( 0 == strcmp( unit_data_type, "FP64" ) ) { - node_op_type = TYPE_FP64; - iterator_type = 8; - } - - Params unit_params; - - char* verbose_param = new char[16]; - snprintf( verbose_param, 16, "%d", output->getVerboseLevel() ); - unit_params.insert( "verbose", verbose_param ); - delete[] verbose_param; - - char* param_name = strtok( nullptr, " " ); - while( nullptr != param_name ) { - char* value = strtok( nullptr, " " ); - output->verbose(CALL_INFO, 4, 0, "param: %s=%s\n", param_name, value); - unit_params.insert( param_name, value ); - param_name = strtok( nullptr, " "); - } - - SerranoCoarseUnit* new_unit = nullptr; - - output->verbose(CALL_INFO, 4, 0, "Creating a new coarse unit type: %s\n", unit_type); - - char* dtype_str = new char[16]; - snprintf( dtype_str, 16, "%d", iterator_type ); - unit_params.insert( "data_type", dtype_str ); - delete[] dtype_str; - - if( 0 == strcmp( unit_type, "ITERATOR" ) ) { - new_unit = loadAnonymousSubComponent( "serrano.SerranoIteratorUnit", "slot", 0, ComponentInfo::SHARE_NONE, unit_params ); - } else if( 0 == strcmp( unit_type, "ADD" ) ) { - new_unit = loadAnonymousSubComponent( "serrano.SerranoBasicUnit", "slot", 0, ComponentInfo::SHARE_NONE, unit_params ); - SerranoBasicUnit* new_unit_basic = (SerranoBasicUnit*) new_unit; - new_unit_basic->configureFunction( output, OP_ADD, node_op_type ); - } else if( 0 == strcmp( unit_type, "SUB" ) ) { - new_unit = loadAnonymousSubComponent( "serrano.SerranoBasicUnit", "slot", 0, ComponentInfo::SHARE_NONE, unit_params ); - SerranoBasicUnit* new_unit_basic = (SerranoBasicUnit*) new_unit; - new_unit_basic->configureFunction( output, OP_SUB, node_op_type ); - } else if( 0 == strcmp( unit_type, "PRINTER" ) ) { - new_unit = loadAnonymousSubComponent( "serrano.SerranoPrinterUnit", "slot", 0, ComponentInfo::SHARE_NONE, unit_params ); - } else { - output->fatal(CALL_INFO, -1, "Error: unable to parse node type (%s)\n", token ); - } - - units.insert( std::pair< uint64_t, SerranoCoarseUnit* >( id, new_unit ) ); - } else if( 0 == strcmp( token, "LINK" ) ) { - char* in_unit = strtok( nullptr, " " ); - char* out_unit = strtok( nullptr, " " ); - - const uint64_t u64_in_unit = std::atoll( in_unit ); - const uint64_t u64_out_unit = std::atoll( out_unit ); - - SerranoCircularQueue* new_q = new SerranoCircularQueue(2); - - if( ( units.find( u64_in_unit ) != units.end() ) && ( units.find( u64_out_unit ) != units.end() ) ) { - output->verbose(CALL_INFO, 4, 0, "Connecting %" PRIu64 " -> %" PRIu64 " (link-id: %" PRIu64 ")\n", - u64_in_unit, u64_out_unit, id); - // These are swapped, input to the link is the output of a unit and vice versa - units[ u64_in_unit ]->addOutputQueue( new_q ); - units[ u64_out_unit ]->addInputQueue( new_q ); - } else { - output->fatal(CALL_INFO, -1, "Error: link does not connect an existing input or output component.\n"); - } - } else { - - } - } - - delete[] line; - fclose( graph_file ); - - /* cycle over and check queues are good, these will fatal */ - for( auto next_unit : units ) { - next_unit.second->checkRequiredQueues( output ); - } -} - -int SerranoComponent::read_line( FILE* file_h, char* buffer, const size_t buffer_max ) { - int status = 0; - int index = 0; - bool keep_looping = true; - - while( keep_looping ) { - char nxt_c = (char) fgetc( file_h ); - - switch( nxt_c ) { - case EOF: - keep_looping = false; - break; - case '\n': - keep_looping = false; - break; - default: - buffer[index++] = nxt_c; - break; - } - } - - buffer[index] = '\0'; - return status; -} - -void SerranoComponent::clearGraph() { - output->verbose(CALL_INFO, 2, 0, "Clearing current graph...\n"); - - for( auto next_q : msg_queues ) { - delete next_q.second;; - } - - msg_queues.clear(); - - for( auto next_unit : units ) { - delete next_unit.second; - } - - units.clear(); - - output->verbose(CALL_INFO, 2, 0, "Graph clear done. Reset is complete\n"); -} diff --git a/src/sst/elements/serrano/serrano.h b/src/sst/elements/serrano/serrano.h deleted file mode 100644 index d94b069847..0000000000 --- a/src/sst/elements/serrano/serrano.h +++ /dev/null @@ -1,75 +0,0 @@ -// Copyright 2009-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _H_SST_SERRANO -#define _H_SST_SERRANO - -#include -#include -#include - -#include - -#include "smsg.h" -#include "scircq.h" -#include "sercgunit.h" - - -namespace SST { -namespace Serrano { - -class SerranoComponent : public SST::Component { - -public: - SerranoComponent( SST::ComponentId_t id, SST::Params& params ); - ~SerranoComponent(); - - bool tick( SST::Cycle_t currentCycle ); - - SST_ELI_REGISTER_COMPONENT( - SerranoComponent, - "serrano", - "Serrano", - SST_ELI_ELEMENT_VERSION( 1, 0, 0 ), - "High-Level CGRA Simulation Model", - COMPONENT_CATEGORY_PROCESSOR - ) - - SST_ELI_DOCUMENT_PARAMS( - - ) - - SST_ELI_DOCUMENT_STATISTICS( - - ) - - void clearGraph(); - void constructGraph( SST::Output* output, const char* kernel_file ); - -private: - int read_line( FILE* file_h, char* buffer, const size_t buffer_max ); - - SST::Output* output; - std::list< std::string > kernel_queue; - std::map< uint64_t, SerranoCoarseUnit* > units; - std::map< uint64_t, SerranoCircularQueue* > msg_queues; - - -}; - -} -} - -#endif diff --git a/src/sst/elements/serrano/serstdunit.h b/src/sst/elements/serrano/serstdunit.h deleted file mode 100644 index 861da10159..0000000000 --- a/src/sst/elements/serrano/serstdunit.h +++ /dev/null @@ -1,193 +0,0 @@ -// Copyright 2009-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _H_SERRANO_BINARY_OP_CG_UNIT -#define _H_SERRANO_BINARY_OP_CG_UNIT - -#include - -#include "smsg.h" -#include "sercgunit.h" -#include "scircq.h" - -namespace SST { -namespace Serrano { - -enum SerranoStandardOp { - OP_ADD, - OP_SUB, - OP_DIV, - OP_MUL, - OP_MOD, - OP_MSG_DUPLICATE, - OP_MSG_INTERLEAVE, - OP_CUSTOM -}; - -class SerranoBasicUnit : public SerranoCoarseUnit { - -public: - SST_ELI_REGISTER_SUBCOMPONENT( - SST::Serrano::SerranoBasicUnit, - "serrano", - "SerranoBasicUnit", - SST_ELI_ELEMENT_VERSION(1, 0, 0), - "Basic coarse-grained functional unit for simple operations", - SST::Serrano::SerranoCoarseUnit - ) - - SST_ELI_DOCUMENT_PARAMS() - SST_ELI_DOCUMENT_STATISTICS() - - SerranoBasicUnit( SST::ComponentId_t id, Params& params ) : - SerranoCoarseUnit(id, params) { - - required_in_qs = 0; - required_out_qs = 0; - } - - ~SerranoBasicUnit() { - msgs_in.clear(); - } - - void configureFunction( SST::Output* output, SerranoStandardOp op, SerranoStandardType dt ) { - switch( op ) { - case OP_ADD: - switch( dt ) { - case TYPE_INT32: unit_func = std::bind( &SST::Serrano::SerranoBasicUnit::execute_i32_add, this, std::placeholders::_1, std::placeholders::_2 ); break; - case TYPE_INT64: unit_func = std::bind( &SST::Serrano::SerranoBasicUnit::execute_i64_add, this, std::placeholders::_1, std::placeholders::_2 ); break; - case TYPE_FP32: unit_func = std::bind( &SST::Serrano::SerranoBasicUnit::execute_f32_add, this, std::placeholders::_1, std::placeholders::_2 ); break; - case TYPE_FP64: unit_func = std::bind( &SST::Serrano::SerranoBasicUnit::execute_f64_add, this, std::placeholders::_1, std::placeholders::_2 ); break; - default: - output->fatal(CALL_INFO, -1, "Unknown data type supplied to an add operation.\n"); - break; - } - - required_in_qs = 2; - required_out_qs = 1; - - break; - default: - output->verbose(CALL_INFO, 2, 0, "Function was not decoded, and so will not be set. This will likely cause a fatal later in execution.\n"); - } - } - - void checkRequiredQueues( SST::Output* output ) { - if( ( required_in_qs >= input_qs.size() ) && - ( required_out_qs >= output_qs.size() ) ) { - - } else { - output->fatal(CALL_INFO, -1, "Error: required queues were not matched. in (req/av): %d/%d, out (req/av): %d/%d\n", - (int) required_in_qs, (int) input_qs.size(), (int) required_out_qs, (int) output_qs.size() ); - } - } - - virtual const char* getUnitTypeString() { - return "STD-UNIT"; - } - - virtual bool stillProcessing() { return false; } - - virtual void execute( const uint64_t current_cycle ) { - if( nullptr == unit_func ) { - output->fatal(CALL_INFO, -1, "Error: function to execute has not been defined or was not decoded correctly.\n"); - } - - bool all_ins_ready = true; - bool out_ready = (! output_qs[0]->full()); - - for( SerranoCircularQueue* in_q : input_qs ) { - all_ins_ready &= (!in_q->empty()); - } - - if( all_ins_ready & out_ready ) { - // We are good to go, all inputs have a message, output has a slot - for( SerranoCircularQueue* in_q : input_qs ) { - msgs_in.push_back( in_q->pop() ); - } - - // Execute the function - unit_func( output, msgs_in ); - - // Delete the messages from the incoming queues to free memory - for( SerranoMessage* in_msg : msgs_in ) { - delete in_msg; - } - - // Clear the vector this cycle - msgs_in.clear(); - } else { - output->verbose(CALL_INFO, 8, 0, "Unable to execute this cycle due to queue-check failing: in-q: %s / out-q: %s\n", - (all_ins_ready) ? "ready" : "not-ready", (out_ready) ? "ready" : "not-ready" ); - } - } - -protected: - template void execute_add( std::vector& msg_in, const T init_value ) { - T result = init_value; - - for( SerranoMessage* msg : msg_in ) { - result += extractValue( output, msg ); - } - - output_qs[0]->push( constructMessage( result ) ); - } - - template void execute_sub( std::vector& msg_in, const T init_value ) { - T result = init_value; - - for( SerranoMessage* msg : msg_in ) { - result -= extractValue( output, msg ); - } - - output_qs[0]->push( constructMessage( result ) ); - } - - void execute_i32_add( SST::Output* output, std::vector& msg_in ) { - execute_add(msg_in, 0); - } - - void execute_u32_add( SST::Output* output, std::vector& msg_in ) { - execute_add(msg_in, 0); - } - - void execute_i64_add( SST::Output* output, std::vector& msg_in ) { - execute_add(msg_in, 0); - } - - void execute_u64_add( SST::Output* output, std::vector& msg_in ) { - execute_add(msg_in, 0); - } - - void execute_f32_add( SST::Output* output, std::vector& msg_in ) { - execute_add(msg_in, 0.0); - } - - void execute_f64_add( SST::Output* output, std::vector& msg_in ) { - execute_add(msg_in, 0.0); - } - - std::vector msgs_in; - std::function< void( SST::Output*, std::vector& )> unit_func; - - size_t required_in_qs; - size_t required_out_qs; - -}; - -} -} - -#endif diff --git a/src/sst/elements/serrano/smsg.h b/src/sst/elements/serrano/smsg.h deleted file mode 100644 index 2afdf8b6a1..0000000000 --- a/src/sst/elements/serrano/smsg.h +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright 2009-2024 NTESS. Under the terms -// of Contract DE-NA0003525 with NTESS, the U.S. -// Government retains certain rights in this software. -// -// Copyright (c) 2009-2024, NTESS -// All rights reserved. -// -// Portions are copyright of other developers: -// See the file CONTRIBUTORS.TXT in the top level directory -// of the distribution for more information. -// -// This file is part of the SST software package. For license -// information, see the LICENSE file in the top level directory of the -// distribution. - -#ifndef _H_SERRANO_MESSAGE -#define _H_SERRANO_MESSAGE - -#include -#include - -namespace SST { -namespace Serrano { - -class SerranoMessage { - -public: - SerranoMessage( const size_t size ) : msg_size(size) { - payload = new uint8_t[ msg_size ]; - } - - SerranoMessage( const size_t size, void* ptr ) : msg_size(size) { - payload = new uint8_t[ msg_size ]; - - uint8_t* ptr_u = (uint8_t*) ptr; - - for( size_t i = 0; i < msg_size; ++i ) { - payload[i] = ptr_u[i]; - } - } - - ~SerranoMessage() { - delete[] payload; - } - - size_t getSize() const { return msg_size; } - uint8_t* getPayload() { return payload; } - - void setPayload( const uint8_t* new_data ) { - for( size_t i = 0; i < msg_size; ++i ) { - payload[i] = new_data[i]; - } - } - - void setPayload( const uint8_t* new_data, const size_t new_size ) { - for( size_t i = 0; i < std::min( new_size, msg_size); ++i ) { - payload[i] = new_data[i]; - } - } - -protected: - const size_t msg_size; - uint8_t* payload; - -}; - -template SerranoMessage* constructMessage( T value ) { - SerranoMessage* new_msg = new SerranoMessage( sizeof(T) ); - new_msg->setPayload( (uint8_t*) &value ); - - return new_msg; -}; - -template T extractValue( SST::Output* output, SerranoMessage* msg ) { - if( sizeof(T) == msg->getSize() ) { - return *( (T*) msg->getPayload() ); - } else { - output->fatal(CALL_INFO, -1, "Error: tried to construct a value needing %d bytes from a message with %d bytes in payload.\n", - (int) sizeof(T), (int) msg->getSize()); - - return T(); - } -}; - -} -} - -#endif diff --git a/src/sst/elements/serrano/tests/graphs/sum.graph b/src/sst/elements/serrano/tests/graphs/sum.graph deleted file mode 100644 index 5a64f3c5b4..0000000000 --- a/src/sst/elements/serrano/tests/graphs/sum.graph +++ /dev/null @@ -1,9 +0,0 @@ - -NODE 0 ITERATOR INT32 start 0 step 1 end 100 -NODE 1 ITERATOR INT32 start 100 step 1 end 200 -NODE 2 ADD INT32 -NODE 3 PRINTER INT32 - -LINK 0 0 2 -LINK 1 1 2 -LINK 2 2 3 diff --git a/src/sst/elements/serrano/tests/test_serrano.py b/src/sst/elements/serrano/tests/test_serrano.py deleted file mode 100644 index eaf1fec5b0..0000000000 --- a/src/sst/elements/serrano/tests/test_serrano.py +++ /dev/null @@ -1,12 +0,0 @@ - -import os -import sst - -# Define SST core options -sst.setProgramOption("timebase", "1ps") - -serr_comp = sst.Component("serrano", "serrano.Serrano") -serr_comp.addParams({ - "verbose" : 26, - "kernel0" : "test/graphs/sum.graph" - })