diff --git a/CMakeLists.txt b/CMakeLists.txt index c34f7cc9..144bc965 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -145,10 +145,12 @@ install ( TARGETS ${ROCPROFILER_TARGET} LIBRARY DESTINATION ${DEST_NAME}/lib ) install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler.h ${CMAKE_CURRENT_SOURCE_DIR}/src/core/activity.h + ${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler_trace_entries.h DESTINATION ${DEST_NAME}/include ) install ( FILES ${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler.h ${CMAKE_CURRENT_SOURCE_DIR}/src/core/activity.h + ${CMAKE_CURRENT_SOURCE_DIR}/inc/rocprofiler_trace_entries.h DESTINATION include/${DEST_NAME} ) # rpl_run.sh tblextr.py txt2xml.sh install ( FILES diff --git a/bin/rpl_run.sh b/bin/rpl_run.sh old mode 100755 new mode 100644 index 8fd2e681..b7e6b32a --- a/bin/rpl_run.sh +++ b/bin/rpl_run.sh @@ -50,6 +50,7 @@ KFD_TRACE=0 HSA_TRACE=0 SYS_TRACE=0 HIP_TRACE=0 +OUTPUT_PLUGIN=0 # Generate stats GEN_STATS=0 @@ -185,6 +186,7 @@ usage() { echo " " echo " " echo "" + echo " --output-plugin - to enable the use of a plugin" echo " --trace-start - to enable tracing on start [on]" echo " --trace-period - to enable trace with initial delay, with periodic sample length and rate" echo " Supported time formats: " @@ -263,6 +265,21 @@ run() { OUTPUT_LIST="$OUTPUT_LIST $ROCP_OUTPUT_DIR/results.txt" fi + if [ $OUTPUT_PLUGIN = 1 ] ; then + if [ ! -e "$PLUGIN_PATH" ] ; then + error "'$PLUGIN_PATH' directory does not exist" + fi + if [ ! -f "$PLUGIN_PATH/rocprofiler_plugin_lib.so" ] ; then + error "Could not find rocprofiler_plugin_lib.so library at '$PLUGIN_PATH'" + fi + if [ ! -e "$PLUGIN_PATH/roctracer_plugin_lib.so" ] ; then + error "Could not find roctracer_plugin_lib.so library at '$PLUGIN_PATH'" + fi + export PLUGIN_LIB="enabled" + export PLUGIN_PATH + export ROCPROFILER_PLUGIN_LIB="$PLUGIN_PATH/rocprofiler_plugin_lib.so" + export ROCTRACER_PLUGIN_LIB="$PLUGIN_PATH/roctracer_plugin_lib.so" + fi API_TRACE="" MY_LD_PRELOAD="" if [ "$ROCTX_TRACE" = 1 ] ; then @@ -436,6 +453,9 @@ while [ 1 ] ; do export ROCP_TIMESTAMP_ON=1 GEN_STATS=1 HIP_TRACE=1 + elif [ "$1" = "--output-plugin" ] ; then + OUTPUT_PLUGIN=1 + PLUGIN_PATH="$2" elif [ "$1" = "--trace-start" ] ; then if [ "$2" = "off" ] ; then export ROCP_CTRL_RATE="-1" diff --git a/inc/rocprofiler_trace_entries.h b/inc/rocprofiler_trace_entries.h new file mode 100644 index 00000000..6611b615 --- /dev/null +++ b/inc/rocprofiler_trace_entries.h @@ -0,0 +1,36 @@ +#ifndef INC_ROCTRACER_TRACE_ENTRIES_H_ +#define INC_ROCTRACER_TRACE_ENTRIES_H_ + +#include + +struct metric_trace_entry_t { + uint32_t dispatch; + const char* name; + uint64_t result; +}; + +struct kernel_trace_entry_t { + uint32_t dispatch; + uint32_t gpu_id; + uint32_t queue_id; + uint64_t queue_index; + uint32_t pid; + uint32_t tid; + uint32_t grid_size; + uint32_t workgroup_size; + uint32_t lds_size; + uint32_t scratch_size; + uint32_t vgpr; + uint32_t sgpr; + uint32_t fbarrier_count; + uint64_t signal_handle; + uint64_t object; + const char* kernel_name; + bool record; + uint64_t dispatch_time; + uint64_t begin; + uint64_t end; + uint64_t complete; +}; + +#endif \ No newline at end of file diff --git a/test/tool/tool.cpp b/test/tool/tool.cpp index 4bdce5dd..c3be6293 100644 --- a/test/tool/tool.cpp +++ b/test/tool/tool.cpp @@ -37,6 +37,7 @@ THE SOFTWARE. #include /* For SYS_xxx definitions */ #include #include +#include #include #include @@ -49,6 +50,7 @@ THE SOFTWARE. #include #include "inc/rocprofiler.h" +#include "inc/rocprofiler_trace_entries.h" #include "util/hsa_rsrc_factory.h" #include "util/xml.h" @@ -152,6 +154,12 @@ static inline uint32_t GetTid() { return syscall(__NR_gettid); } uint32_t my_pid = GetPid(); +//Plugins objects +void* dl_handle; +void (*init_plugin_lib)(const char*, std::vector); +void (*close_plugin_lib)(); +bool output_plugin_enabled = false; + // Error handler void fatal(const std::string msg) { fflush(stdout); @@ -337,19 +345,25 @@ unsigned align_size(unsigned size, unsigned alignment) { return ((size + alignment - 1) & ~(alignment - 1)); } +void (*metric_flush_cb_ptr)(metric_trace_entry_t *entry); +void metric_flush_cb(metric_trace_entry_t *entry){ + fprintf(result_file_handle, " %s ", entry->name); + fprintf(result_file_handle, "(%lu)\n", entry->result); +} + // Output profiling results for input features void output_results(const context_entry_t* entry, const char* label) { - FILE* file = entry->file_handle; const rocprofiler_feature_t* features = entry->features; const unsigned feature_count = entry->feature_count; for (unsigned i = 0; i < feature_count; ++i) { const rocprofiler_feature_t* p = &features[i]; - fprintf(file, " %s ", p->name); switch (p->data.kind) { // Output metrics results - case ROCPROFILER_DATA_KIND_INT64: - fprintf(file, "(%lu)\n", p->data.result_int64); + case ROCPROFILER_DATA_KIND_INT64: { + metric_trace_entry_t metric_trace_entry = {entry->index, p->name, p->data.result_int64}; + metric_flush_cb_ptr(&metric_trace_entry); + } break; default: fprintf(stderr, "RPL-tool: undefined data kind(%u)\n", p->data.kind); @@ -371,6 +385,34 @@ void output_group(const context_entry_t* entry, const char* label) { } } +void (*kernel_flush_cb_ptr)(kernel_trace_entry_t *entry); +void kernel_flush_cb(kernel_trace_entry_t* entry){ + fprintf(result_file_handle, "dispatch[%u], gpu-id(%u), queue-id(%u), queue-index(%lu), pid(%u), tid(%u), grd(%u), wgr(%u), lds(%u), scr(%u), vgpr(%u), sgpr(%u), fbar(%u), sig(0x%lx), obj(0x%lx), kernel-name(\"%s\")", + entry->dispatch, + entry->gpu_id, + entry->queue_id, + entry->queue_index, + entry->pid, + entry->tid, + entry->grid_size, + entry->workgroup_size, + entry->lds_size, + entry->scratch_size, + entry->vgpr, + entry->sgpr, + entry->fbarrier_count, + entry->signal_handle, + entry->object, + entry->kernel_name); + if (entry->record) fprintf(result_file_handle, ", time(%lu,%lu,%lu,%lu)", + entry->dispatch_time, + entry->begin, + entry->end, + entry->complete); + fprintf(result_file_handle, "\n"); + fflush(result_file_handle); +} + // Dump stored context entry bool dump_context_entry(context_entry_t* entry, bool to_clean = true) { hsa_status_t status = HSA_STATUS_ERROR; @@ -389,15 +431,12 @@ bool dump_context_entry(context_entry_t* entry, bool to_clean = true) { const uint32_t index = entry->index; if (index != UINT32_MAX) { - FILE* file_handle = entry->file_handle; const std::string nik_name = (to_truncate_names == 0) ? entry->data.kernel_name : filtr_kernel_name(entry->data.kernel_name); const AgentInfo* agent_info = HsaRsrcFactory::Instance().GetAgentInfo(entry->agent); - - fprintf(file_handle, "dispatch[%u], gpu-id(%u), queue-id(%u), queue-index(%lu), pid(%u), tid(%u), grd(%u), wgr(%u), lds(%u), scr(%u), vgpr(%u), sgpr(%u), fbar(%u), sig(0x%lx), obj(0x%lx), kernel-name(\"%s\")", - index, + kernel_trace_entry_t kernel_trace_entry = {entry->index, agent_info->dev_index, entry->data.queue_id, - entry->data.queue_index, + entry->data.queue_index, my_pid, entry->data.thread_id, entry->kernel_properties.grid_size, @@ -409,14 +448,13 @@ bool dump_context_entry(context_entry_t* entry, bool to_clean = true) { entry->kernel_properties.fbarrier_count, entry->kernel_properties.signal.handle, entry->kernel_properties.object, - nik_name.c_str()); - if (record) fprintf(file_handle, ", time(%lu,%lu,%lu,%lu)", - record->dispatch, - record->begin, - record->end, - record->complete); - fprintf(file_handle, "\n"); - fflush(file_handle); + nik_name.c_str(), + record != NULL ? true : false, + record != NULL ? record->dispatch: 0, + record != NULL ? record->begin : 0, + record != NULL ? record->end : 0, + record != NULL ? record->complete : 0}; + kernel_flush_cb_ptr(&kernel_trace_entry); } if (record && to_clean) { delete record; @@ -957,6 +995,52 @@ extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings) rcpath = std::string(pkg_dir) + "/" + rcfile_name; rcfile = xml::Xml::Create(rcpath); } + + //Load output plugin if enabled + const char* plugin_lib = getenv("PLUGIN_LIB"); + if(plugin_lib != NULL){ + if (std::string(plugin_lib).find("enabled") != std::string::npos) { + output_plugin_enabled = true; + const char* rocprofiler_plugin_lib = getenv("ROCPROFILER_PLUGIN_LIB"); + if(rocprofiler_plugin_lib){ + dl_handle = dlopen(rocprofiler_plugin_lib, RTLD_LAZY); + if (!dl_handle) { + printf("error: %s\n", dlerror()); + abort(); + } + + init_plugin_lib = (void (*)(const char* prefix, std::vector metrics_vector))dlsym(dl_handle, "init_plugin_lib"); + if (!init_plugin_lib) { + printf("error: %s\n", dlerror()); + abort(); + } + + close_plugin_lib = (void (*)())dlsym(dl_handle, "close_plugin_lib"); + if (!close_plugin_lib) { + printf("error: %s\n", dlerror()); + abort(); + } + } + + metric_flush_cb_ptr = (void (*)(metric_trace_entry_t *entry))dlsym(dl_handle, "metric_flush_cb"); + if (!metric_flush_cb_ptr) { + printf("error: %s\n", dlerror()); + abort(); + } + + kernel_flush_cb_ptr = (void (*)(kernel_trace_entry_t *entry))dlsym(dl_handle, "kernel_flush_cb"); + if (!kernel_flush_cb_ptr) { + printf("error: %s\n", dlerror()); + abort(); + } + + } + } + + if(!output_plugin_enabled){ + metric_flush_cb_ptr = metric_flush_cb; + kernel_flush_cb_ptr = kernel_flush_cb; + } if (rcfile != NULL) { // Getting defaults printf("ROCProfiler pid(%u): rc-file '%s'\n", GetPid(), rcpath.c_str()); @@ -1046,14 +1130,16 @@ extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings) perror(errmsg.str().c_str()); abort(); } - std::ostringstream oss; - oss << result_prefix << "/" << GetPid() << "_results.txt"; - result_file_handle = fopen(oss.str().c_str(), "w"); - if (result_file_handle == NULL) { - std::ostringstream errmsg; - errmsg << "ROCProfiler: fopen error, file '" << oss.str().c_str() << "'"; - perror(errmsg.str().c_str()); - abort(); + if(!output_plugin_enabled){ + std::ostringstream oss; + oss << result_prefix << "/" << GetPid() << "_results.txt"; + result_file_handle = fopen(oss.str().c_str(), "w"); + if (result_file_handle == NULL) { + std::ostringstream errmsg; + errmsg << "ROCProfiler: fopen error, file '" << oss.str().c_str() << "'"; + perror(errmsg.str().c_str()); + abort(); + } } } else result_file_handle = stdout; @@ -1085,6 +1171,11 @@ extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings) } } + //Initialize the plugin if needed once the metrics have been registered + if(output_plugin_enabled){ + init_plugin_lib(result_prefix, metrics_vec); + } + // Getting GPU indexes gpu_index_vec = new std::vector; get_xml_array(xml, "top.metric", "gpu_index", ",", gpu_index_vec, " "); @@ -1108,7 +1199,6 @@ extern "C" PUBLIC_API void OnLoadToolProp(rocprofiler_settings_t* settings) // Getting traces const auto traces_list = xml->GetNodes("top.trace"); - if (traces_list.size() > 1) fatal("ROCProfiler: only one trace supported at a time"); const unsigned feature_count = metrics_vec.size() + traces_list.size(); rocprofiler_feature_t* features = new rocprofiler_feature_t[feature_count]; @@ -1266,6 +1356,12 @@ void rocprofiler_unload(bool is_destr) { delete context_array; context_array = NULL; + //Close the plugin and unload the library if plugin was used + if(output_plugin_enabled){ + close_plugin_lib(); + dlclose(dl_handle); + } + ONLOAD_TRACE_END(); }