diff --git a/book-user-code/LICENSE b/book-user-code/LICENSE new file mode 100644 index 000000000000..fec78b5cb236 --- /dev/null +++ b/book-user-code/LICENSE @@ -0,0 +1,13 @@ +The 3-Clause BSD License + +Copyright + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/book-user-code/README b/book-user-code/README new file mode 100644 index 000000000000..13411f844e06 --- /dev/null +++ b/book-user-code/README @@ -0,0 +1,15 @@ +This directory contains all the source files used in the book. +The compile_all_user.sh script compiles everything. + +The kutrace_control.cc program is used to create traces, while other programs +are used to postprocess those traces into HTML display files. The postproc3.sh +script takes a tracefile stem (no ".trace") and produces the corresponding JSON +and HTML files. + +Yet other programs are part of the book content or are used in the exercises. + +All the code is open sourced under the BSD three-clause license. + + + + diff --git a/book-user-code/base40.cc b/book-user-code/base40.cc new file mode 100644 index 000000000000..c3f0dde24727 --- /dev/null +++ b/book-user-code/base40.cc @@ -0,0 +1,112 @@ +// Little program to turn input strings into base40 values +// Copyright 2021 Richard L. Sites +// +// compile with g++ -O2 base40.cc -o base40 +// + +// Example output: +#define BASE40_a 1 // "a" +#define BASE40__a 79 // "/a" +#define BASE40_cow 37403 // "cow" +#define BASE40__cow 1496159 // "/cow" +#define BASE40_zero 989026 // "zero" +#define BASE40__zero 39561079 // "/zero" + + +#include +#include + +typedef unsigned long int u64; + +// Uppercase mapped to lowercase +// All unexpected characters mapped to '-' +// - = 0x2D . = 0x2E / = 0x2F +// Base40 characters are _abcdefghijklmnopqrstuvwxyz0123456789-./ +// 0 1 2 3 +// 0123456789012345678901234567890123456789 +// where the first is NUL. +static const char kToBase40[256] = { + 0,38,38,38, 38,38,38,38, 38,38,38,38, 38,38,38,38, + 38,38,38,38, 38,38,38,38, 38,38,38,38, 38,38,38,38, + 38,38,38,38, 38,38,38,38, 38,38,38,38, 38,37,38,39, + 27,28,29,30, 31,32,33,34, 35,36,38,38, 38,38,38,38, + + 38, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11, 12,13,14,15, + 16,17,18,19, 20,21,22,23, 24,25,26,38, 38,38,38,38, + 38, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11, 12,13,14,15, + 16,17,18,19, 20,21,22,23, 24,25,26,38, 38,38,38,38, + + 38,38,38,38, 38,38,38,38, 38,38,38,38, 38,38,38,38, + 38,38,38,38, 38,38,38,38, 38,38,38,38, 38,38,38,38, + 38,38,38,38, 38,38,38,38, 38,38,38,38, 38,38,38,38, + 38,38,38,38, 38,38,38,38, 38,38,38,38, 38,38,38,38, + + 38,38,38,38, 38,38,38,38, 38,38,38,38, 38,38,38,38, + 38,38,38,38, 38,38,38,38, 38,38,38,38, 38,38,38,38, + 38,38,38,38, 38,38,38,38, 38,38,38,38, 38,38,38,38, + 38,38,38,38, 38,38,38,38, 38,38,38,38, 38,38,38,38, +}; + +static const char kFromBase40[40] = { + '\0','a','b','c', 'd','e','f','g', 'h','i','j','k', 'l','m','n','o', + 'p','q','r','s', 't','u','v','w', 'x','y','z','0', '1','2','3','4', + '5','6','7','8', '9','-','.','/', +}; + +// Unpack six characters from 32 bits. +// str must be 8 bytes. We somewhat-arbitrarily capitalize the first letter +char* Base40ToChar(u64 base40, char* str) { + base40 &= 0x00000000fffffffflu; // Just low 32 bits + memset(str, 0, 8); + bool first_letter = true; + // First character went in last, comes out first + int i = 0; + while (base40 > 0) { + u64 n40 = base40 % 40; + str[i] = kFromBase40[n40]; + base40 /= 40; + if (first_letter && (1 <= n40) && (n40 <= 26)) { + str[i] &= ~0x20; // Uppercase it + first_letter = false; + } + ++i; + } + return str; +} + +// Pack six characters into 32 bits. Only use a-zA-Z0-9.-/ +u64 CharToBase40(const char* str) { + int len = strlen(str); + // If longer than 6 characters, take only the first 6 + if (len > 6) {len = 6;} + u64 base40 = 0; + // First character goes in last, comes out first + for (int i = len - 1; i >= 0; -- i) { + base40 = (base40 * 40) + kToBase40[str[i]]; + } + return base40; +} + + + +static const int kBuffersize = 128; +// Allowed in C variable names; others replaced with underscore +static const char* kAllowed = "_ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; + +int main (int argc, const char** argv) { + char buffer[kBuffersize]; + char buffer2[kBuffersize]; + while (fgets(buffer, kBuffersize, stdin) != NULL) { + if (buffer[strlen(buffer) - 1] == '\n') {buffer[strlen(buffer) - 1] = '\0';} + if (buffer[strlen(buffer) - 1] == '\r') {buffer[strlen(buffer) - 1] = '\0';} + for (int i = 0; i < strlen(buffer); ++i) { + buffer2[i] = (strchr(kAllowed, buffer[i]) != NULL) ? buffer[i] : '_'; + } + buffer2[strlen(buffer)] = '\0'; + //printf("%ld // %s\n", CharToBase40(buffer), buffer); + printf("#define BASE40_%s %ld\t// \"%s\"\n", buffer2, CharToBase40(buffer), buffer); + } + + return 0; +} + diff --git a/book-user-code/basetypes.h b/book-user-code/basetypes.h new file mode 100644 index 000000000000..045685b50943 --- /dev/null +++ b/book-user-code/basetypes.h @@ -0,0 +1,59 @@ +// Base types to use thoughout class +// Copyright 2021 Richard L. Sites + +#ifndef __BASETYPES_H__ +#define __BASETYPES_H__ + +#include + +typedef int8_t int8; +typedef uint8_t uint8; +typedef int16_t int16; +typedef uint16_t uint16; +typedef int32_t int32; +typedef uint32_t uint32; + +#ifdef __ARM_ARCH_ISA_ARM + +typedef long long int int64; +typedef long long unsigned int uint64; +#define FUINTPTRX "%08lx" +#define FLX "%016llx" +#define FLD "%lld" +#define CL(x) x##LL +#define CLU(x) x##LLU + +#elif defined(__aarch64__) +typedef long long int int64; +typedef long long unsigned int uint64; +#define FUINTPTRX "%016lx" +#define FLX "%016llx" +#define FLD "%lld" +#define CL(x) x##LL +#define CLU(x) x##LLU + +#elif defined(__x86_64) +/* make almost the same as ARM-32 */ +typedef long long int int64; +typedef long long unsigned int uint64; +#define FUINTPTRX "%016lx" +#define FLX "%016llx" +#define FLD "%lld" +#define CL(x) x##LL +#define CLU(x) x##LLU + +#elif 0 +/* actual 64-bit types */ +typedef long int int64; +typedef long unsigned int uint64; +#define FUINTPTRX "%016lx" +#define FLX "%016lx" +#define FLD "%ld" +#define CL(x) x##L +#define CLU(x) x##LU + +#else +#error Need type defines for your architecture +#endif + +#endif // __BASETYPES_H__ diff --git a/book-user-code/client4.cc b/book-user-code/client4.cc new file mode 100644 index 000000000000..5c90f54d364e --- /dev/null +++ b/book-user-code/client4.cc @@ -0,0 +1,441 @@ +// client4.cc cloned from client2.cc 2018.04.16 +// Copyright 2021 Richard L. Sites +// +// compile with g++ -O2 -pthread client4.cc dclab_log.cc dclab_rpc.cc kutrace_lib.cc -o client4 +// + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "basetypes.h" +#include "dclab_log.h" +#include "dclab_rpc.h" +#include "kutrace_lib.h" +#include "polynomial.h" +#include "spinlock.h" +#include "timecounters.h" + +// Cheap globals for local statistics and logging + +// Response time in usec histogram buckets of floor lg usec +static uint32 hist[32]; +static int64 rpc_count; +static int64 total_usec; +static int64 txbytes; +static int64 rxbytes; + +static bool verbose; +static FILE* logfile; +static uint32 server_ipnum; +static uint16 server_portnum; + +// Global for fast sink. Build value once and then reuse +static string sink_value; + +inline uint32 NextRand(uint32* seed) { + *seed = POLYSHIFT32(*seed); + return *seed; +} + +void WaitMsec(int32 msec) { + struct timespec tv; + tv.tv_sec = msec / 1000; + tv.tv_nsec = (msec % 1000) * 1000000; + nanosleep(&tv, NULL); +} + + +int ConnectToServer(const char* server_name, const char* server_port, + uint32* server_ipnum, uint16* server_portnum) { + struct addrinfo hints; + struct addrinfo* server; + int sockfd; + int iret; + + // First, load up address structs with getaddrinfo(): + memset(&hints, 0, sizeof hints); + hints.ai_family = AF_INET; // IPv4 + hints.ai_socktype = SOCK_STREAM; // TCP + iret = getaddrinfo(server_name, server_port, &hints, &server); + if (iret != 0) {Error("getaddrinfo", gai_strerror(iret));} + + // Make a socket: + sockfd = socket(server->ai_family, server->ai_socktype, server->ai_protocol); + if (sockfd < 0) {Error("socket");} + + // Connect + iret = connect(sockfd, server->ai_addr, server->ai_addrlen); + if (iret < 0) {Error("connect");} + const sockaddr_in* sin = reinterpret_cast(server->ai_addr); + fprintf(stderr, "at client, server IP = %08x:%04x\n", ntohl(sin->sin_addr.s_addr), ntohs(sin->sin_port)); + *server_ipnum = ntohl(sin->sin_addr.s_addr); + *server_portnum = ntohs(sin->sin_port); + + return sockfd; +} + +// Send one RPC over the wire: marker, header, data +bool SendRequest(int sockfd, RPC* req) { + return SendOneRPC(sockfd, req, NULL); +} + +bool ReceiveResponse(int sockfd, RPC* response) { + ReadOneRPC(sockfd, response, NULL); + return true; +} + +void PrintResponse(FILE* f, RPC* response) { + // If we have the response to a stats request, print it here onto stdout + if ((strcmp(response->header->method, "stats") == 0) && (response->datalen >= 4)) { + const uint8* temp = response->data; + string stats = GetStringArg(&temp); + fprintf(stdout, "%s\n", stats.c_str()); + } +} + + +void IncrString(char* s) { + int len = strlen(s); + for (int i = len - 1; i >= 0; ++i) { + char c = s[i]; + if (c == '9') {s[i] = '0';} + else if (c == 'z') {s[i] = 'a';} + else if (c == 'Z') {s[i] = 'A';} + else if (c > 0x7e) {s[i] = 0x21;} + else {s[i] += 1; return;} + } +} + +void IncrString(string* s) { + for (int i = s->size() - 1; i >= 0; --i) { + char c = (*s)[i]; + if (c == '9') {(*s)[i] = '0';} + else if (c == 'z') {(*s)[i] = 'a';} + else if (c == 'Z') {(*s)[i] = 'A';} + else if (c > 0x7e) {(*s)[i] = 0x21;} + else {(*s)[i] += 1; return;} + } + // We can fall out if we roll over, such as 9999 to 0000. All is OK. +} + + +// The working-on-RPC events KUTRACE_RPCIDREQ and KUTRACE_RPCIDRESP have this +// format: +// +-------------------+-----------+---------------+-------+-------+ +// | timestamp 2 | event | lglen8 | RPCid | (2) +// +-------------------+-----------+---------------+-------+-------+ +// 20 12 8 8 16 + +bool SendCommand(int sockfd, uint32* randseed, const char* command, + string& key_base_str, int key_padlen, + string& value_base_str, int value_padlen) { + bool ok = true; + string value; + string* val; + // Expand string values as needed + string key = key_base_str; + PadToStr(randseed, key_padlen, &key); + // Cache sink_value after first time, for speed + bool sink_command = (strcmp(command, "sink") == 0); + if ((sink_value.empty()) || !sink_command) { + val = &value; + value = value_base_str; + PadToStr(randseed, value_padlen, &value); + if (sink_command) {sink_value = value;} + } else { + val = &sink_value; + } + + // Build request header, setting RPCID + RPCHeader rpcheader; + memset(&rpcheader, 0, sizeof(RPCHeader)); + rpcheader.type = ReqSendType; + rpcheader.server_ip = server_ipnum; + rpcheader.server_port = server_portnum; + rpcheader.rpcid = NextRand(randseed); + rpcheader.parent = 0; + strncpy(&rpcheader.method[0], command, 8); + + RPC request; + memset(&request, 0, sizeof(RPC)); + request.header = &rpcheader; + request.headerlen = sizeof(RPCHeader); + + if ((key.size() > 0) && (val->size() > 0)) { + PutStringRPC2(key, *val, &request); + } else if (key.size() > 0) { + PutStringRPC(key, &request); + } else if (val->size() > 0) { + PutStringRPC(*val, &request); + } + + uint8 lglen8 = TenLg(request.datalen); + request.header->lglen1 = lglen8; // Request length + request.header->type = ReqSendType; + request.header->req_send_timestamp = GetUsec(); // T1 + + // Trace the outgoing RPC request + // RPCid is pseudo-random 32 bits, but never zero. If low 16 bits are zero, send high 16 bits. + uint32 tempid = rpcid32_to_rpcid16(request.header->rpcid); + + // rls 2020.08.23 record the method name for each outgoing RPC + // We also pack in the request length + kutrace::addname(KUTRACE_METHODNAME, tempid, request.header->method); + + // Start tracing the outgoing RPC request + kutrace::addevent(KUTRACE_RPCIDREQ, (lglen8 << 16) | tempid); + + if (verbose) {fprintf(stdout, "client4: SendRequest: "); PrintRPC(stdout, &request);} + LogRPC(logfile, &request); + + + ok &= SendRequest(sockfd, &request); + + // Stop tracing the outgoing RPC request + kutrace::addevent(KUTRACE_RPCIDREQ, 0); + + // Block here until the response comes back + RPC response; + ok &= ReceiveResponse(sockfd, &response); + + int64 resp_rcv_time = GetUsec(); // T4 + response.header->resp_rcv_timestamp = resp_rcv_time; + response.header->type = RespRcvType; + + // Trace the incoming RPC response + // RPCid is pseudo-random 32 bits, but never zero. If low 16 bits are zero, use high 16 bits. + tempid = rpcid32_to_rpcid16(response.header->rpcid); + lglen8 = response.header->lglen2; // Respense length + + // Start tracing the incoming RPC response + kutrace::addevent(KUTRACE_RPCIDRESP, (lglen8 << 16) | tempid); + + if (verbose) {fprintf(stdout, "client4: ReceiveResponse: "); PrintRPC(stdout, &response);} + LogRPC(logfile, &response); + int64 elapsed = resp_rcv_time - response.header->req_send_timestamp; + + // Print first 20 round-trip times in msec + if (rpc_count < 20) { + fprintf(stdout, "%5.3fms ", elapsed / 1000.0); + if ((rpc_count % 10) == 9) {fprintf(stdout, "\n");} + } + // gather some simple statistics + int32 usec = elapsed; + ++hist[FloorLg(usec)]; + ++rpc_count; + total_usec += elapsed; + txbytes += sizeof(RPCMarker) + request.headerlen + request.datalen; + rxbytes += sizeof(RPCMarker) + response.headerlen + response.datalen; + + PrintResponse(stdout, &response); + FreeRPCDataOnly(&request); + FreeRPC(&response); + + // Stop tracing the incoming RPC response + kutrace::addevent(KUTRACE_RPCIDRESP, 0); + + return ok; +} + +bool SendQuit(int sockfd, uint32* randseed) { + bool ok = true; + RPCHeader rpcheader; + memset(&rpcheader, 0, sizeof(RPCHeader)); + rpcheader.type = ReqSendType; + rpcheader.server_ip = server_ipnum; + rpcheader.server_port = server_portnum; + rpcheader.rpcid = NextRand(randseed); + rpcheader.parent = 0; + strncpy(&rpcheader.method[0], "quit", 8); + + RPC request; + memset(&request, 0, sizeof(RPC)); + request.header = &rpcheader; + request.headerlen = sizeof(RPCHeader); + + request.header->req_send_timestamp = GetUsec(); + request.header->type = ReqSendType; + LogRPC(logfile, &request); + if (verbose) {fprintf(stdout, "client4: SendRequest: "); PrintRPC(stdout, &request);} + ok &= SendRequest(sockfd, &request); + + // Block here until the response comes back + RPC response; + ok &= ReceiveResponse(sockfd, &response); + int64 resp_rcv_time = GetUsec(); + response.header->resp_rcv_timestamp = resp_rcv_time; + response.header->type = RespRcvType; + LogRPC(logfile, &response); + if (verbose) {fprintf(stdout, "client4: ReceiveResponse: "); PrintRPC(stdout, &response);} + int64 elapsed = resp_rcv_time - response.header->req_send_timestamp; + + PrintResponse(stdout, &response); + FreeRPCDataOnly(&request); + FreeRPC(&response); + return ok; +} + + +void Usage() { + fprintf(stderr, + "Usage: client4 server port [-rep number] [-k number] [-waitms number] [-verbose] [-seed1]\n" + " command [-key \"keybase\" [+] [padlen]] [-value \"valuebase\" [+] [padlen]]\n"); + fprintf(stderr, " command: ping [-value \"valuebase\" [+] [padlen]]\n"); + fprintf(stderr, " command: read -key \"keybase\" [+] [padlen]\n"); + fprintf(stderr, " command: write -key \"keybase\" [+] [padlen] -value \"valuebase\" [+] [padlen]\n"); + fprintf(stderr, " command: sink -key \"keybase\" [+] [padlen] -value \"valuebase\" [+] [padlen]\n"); + fprintf(stderr, " command: delete -key \"keybase\" [+] [padlen]\n"); + fprintf(stderr, " command: stats \n"); + fprintf(stderr, " command: reset \n"); + fprintf(stderr, " command: quit \n"); + exit(EXIT_FAILURE); +} + +// Our little client language, examples +// +// Our key or value field consists of +// a base string +// an optional + parameter indicating that the base is to be incremented each time +// an optional pad length parameter indicating that pseudo-random characters are to +// be appended each time (Note that padding the key field is suspect, because those +// keys cannot be reproduced for subsequent commands.) +// +// client2 dclab-1.epfl.ch 12345 ping +// client2 dclab-1.epfl.ch 12345 ping -value "vvvv" + 4000 +// client2 dclab-1.epfl.ch 12345 -rep 10 -k 5 read -key "kkkkk" + -waitms 4 +// client2 dclab-1.epfl.ch 12345 -rep 10 -k 5 write -key "kkkkk" + -value "vvvv" + 20 -waitms 100 +// client2 dclab-1.epfl.ch 12345 delete -k 5 delete -key "kkkkk" + +// client2 dclab-1.epfl.ch 12345 stats +// client2 dclab-1.epfl.ch 12345 reset +// client2 dclab-1.epfl.ch 12345 quit + + +int main (int argc, const char** argv) { + // Command-line argument variables and their defaults + int32 outer_repeats = 1; + int32 inner_repeats = 1; + int32 wait_msec = 0; + const char* command = NULL; // command is required + const char* key_base = ""; + const char* value_base = ""; + int key_padlen = 0; + int value_padlen = 0; + bool key_incr = false; + bool value_incr = false; + verbose = false; + uint32 randseed = 1; + bool seed1 = false; // If true, set seed to 1 every time for repeatability + sink_value.clear(); + + // Get the command-line arguments + // Server name as text is argv[1] and server port as text is argv[2]. Start here looking at [3] + if (argc < 4) { Usage(); } + + for (int i = 3; i < argc; ++i) { + if(strcmp(argv[i], "-rep") == 0) {if (i + 1 < argc) {outer_repeats = atoi(argv[i + 1]); ++i;}} + else if(strcmp(argv[i], "-k") == 0) {if (i + 1 < argc) {inner_repeats = atoi(argv[i + 1]); ++i;}} + else if(strcmp(argv[i], "-key") == 0) { + if (i + 1 < argc) {key_base = argv[i + 1]; ++i;} + if ((i + 1 < argc) && (argv[i + 1][0] == '+')) {key_incr = true; ++i;} + if ((i + 1 < argc) && (argv[i + 1][0] != '-')) {key_padlen = atoi(argv[i + 1]); ++i;} + } + else if(strcmp(argv[i], "-value") == 0) { + if (i + 1 < argc) {value_base = argv[i + 1]; ++i;} + if ((i + 1 < argc) && (argv[i + 1][0] == '+')) {value_incr = true; ++i;} + if ((i + 1 < argc) && (argv[i + 1][0] != '-')) {value_padlen = atoi(argv[i + 1]); ++i;} + } + else if(strcmp(argv[i], "-waitms") == 0) {if (i + 1 < argc) {wait_msec = atoi(argv[i + 1]); ++i;}} + else if (strcmp(argv[i], "-verbose") == 0) {verbose = true;} + else if (strcmp(argv[i], "-seed1") == 0) {seed1 = true;} + // Bare word is command if we haven't seen one yet + else if ((argv[i][0] != '-') && (command == NULL)) {command = argv[i];} + else {fprintf(stderr, "Bad token at argv[%d] %s\n", i, argv[i]); Usage();} + } + + if (command == NULL) {fprintf(stderr, "No command\n"); Usage();} + if ((strcmp(command, "read") == 0) && (key_base[0] =='\0')) {fprintf(stderr, "Missing -key for read\n"); Usage();} + if ((strcmp(command, "write") == 0) && (key_base[0] =='\0')) {fprintf(stderr, "Missing -key for write\n"); Usage();} + if ((strcmp(command, "sink") == 0) && (key_base[0] =='\0')) {fprintf(stderr, "Missing -key for sink\n"); Usage();} + if ((strcmp(command, "delete") == 0) && (key_base[0] =='\0')) {fprintf(stderr, "Missing -key for delete\n"); Usage();} + if ((strcmp(command, "write") == 0) && (value_base[0] =='\0')) {fprintf(stderr, "Missing -value for write\n"); Usage();} + if ((strcmp(command, "sink") == 0) && (value_base[0] =='\0')) {fprintf(stderr, "Missing -value for sink\n"); Usage();} + + // fprintf(stdout, "outer %d, inner %d, wait %d, cmd %s, key %s, val %s, kpad %d, vpad %d, kincr %d, vincr %d, verbose %d\n", + // outer_repeats, inner_repeats, wait_msec, command, key_base, value_base, key_padlen, value_padlen, key_incr, value_incr, verbose); + + // Initialize globals for local statistics + memset(hist, 0, sizeof(hist)); + rpc_count = 0; + total_usec = 0; + txbytes = 0; + rxbytes = 0; + + const char* logfilename = MakeLogFileName(argv[0]); + logfile = OpenLogFileOrDie(logfilename); + + // Initialize pseudo-random generator based on process id and time + uint32 pid = getpid(); + randseed = time(NULL) ^ (pid << 16) ; // Different seed each run + if (randseed == 0) {randseed = POLYINIT32;} // Safety move to avoid accidental seed=0 + if (seed1) {randseed = 1;} + + // Copy key_base and value_base so they can be incremented + string key_base_str = string(key_base); + string value_base_str = string(value_base); + bool ok = true; + bool sink_command = (strcmp(command, "sink") == 0); + + int sockfd = ConnectToServer(argv[1], argv[2], &server_ipnum, &server_portnum); + + // The double-nested command loop + for (int i = 0; i < outer_repeats; ++i) { + if (sink_command) {kutrace::mark_d(value_padlen + i);} + for (int j = 0; j < inner_repeats; ++j) { + SendCommand(sockfd, &randseed, command, key_base_str, key_padlen, value_base_str, value_padlen); + if (key_incr) {IncrString(&key_base_str);} + if (value_incr) {IncrString(&value_base_str);} + } + WaitMsec(wait_msec); + } + + close(sockfd); + + + // Print some summary statistics + fprintf(stderr, "\n"); + fprintf(stderr, "Histogram of floor log 2 buckets of usec response times\n"); + fprintf(stderr, "1 2+ 4+ us 1+ 2+ 4+ msec 1+ 2+ 4+ sec 1K+ 2k+ secs\n"); + fprintf(stderr, "| | | |\n"); + for (int i = 0; i < 32; ++i) { + fprintf(stderr, "%d ", hist[i]); + if ((i % 10) == 9) {fprintf(stderr, " ");} + } + fprintf(stderr, "\n"); + fprintf(stderr, "%lld RPCs, %5.1f msec, %5.3f TxMB, %5.3f RxMB total\n", + rpc_count, total_usec / 1000.0, txbytes / 1000000.0, rxbytes / 1000000.0); + fprintf(stderr, + "%5.1f RPC/s (%5.3f msec/RPC), %5.1f TxMB/s, %5.1f RxMB/s\n", + (rpc_count * 1000000.0) / total_usec, + (total_usec * 0.001) / rpc_count, + (txbytes * 1.0) / total_usec, + (rxbytes * 1.0) / total_usec); + fprintf(stderr, "\n"); + + fclose(logfile); + fprintf(stderr, "%s written\n", logfilename); + + + return EXIT_SUCCESS; +} + + diff --git a/book-user-code/compile_all_user.sh b/book-user-code/compile_all_user.sh new file mode 100755 index 000000000000..9b2696f46be2 --- /dev/null +++ b/book-user-code/compile_all_user.sh @@ -0,0 +1,37 @@ +g++ -O2 -pthread client4.cc dclab_log.cc dclab_rpc.cc kutrace_lib.cc -o client4 +g++ -O2 dumplogfile4.cc dclab_log.cc -o dumplogfile4 +g++ -O2 eventtospan3.cc -o eventtospan3 +g++ -O2 flt_hog.cc kutrace_lib.cc -o flt_hog +g++ -O2 hello_world_trace.c kutrace_lib.cc -o hello_world_trace +g++ -O2 kutrace_control.cc kutrace_lib.cc -o kutrace_control +g++ -O2 makeself.cc -o makeself +g++ -O2 matrix.cc kutrace_lib.cc -o matrix_ku +g++ -O2 memhog_3.cc kutrace_lib.cc -o memhog3 +g++ -O2 memhog_ram.cc kutrace_lib.cc -o memhog_ram +g++ -O2 mystery0.cc -o mystery0 +g++ -O2 mystery1.cc -o mystery1 +g++ -O2 mystery2.cc -o mystery2 +g++ -O2 mystery3.cc -lrt -o mystery3_opt +g++ -O2 -pthread mystery23.cc kutrace_lib.cc -o mystery23 +g++ -O2 mystery25.cc kutrace_lib.cc -o mystery25 +g++ -O2 -pthread mystery27.cc fancylock2.cc mutex2.cc kutrace_lib.cc dclab_log.cc -o mystery27 +g++ -O2 -pthread mystery27a.cc fancylock2.cc mutex2.cc kutrace_lib.cc dclab_log.cc -o mystery27a +g++ -O2 paging_hog.cc kutrace_lib.cc -o paging_hog +g++ -O2 pcaptojson.cc dclab_log.cc dclab_rpc.cc kutrace_lib.cc -lpcap -o pcaptojson +g++ -O2 -pthread queuetest.cc dclab_log.cc dclab_rpc.cc kutrace_lib.cc -o queuetest +g++ -O2 rawtoevent.cc from_base40.cc kutrace_lib.cc -o rawtoevent +g++ -O2 samptoname_k.cc -o samptoname_k +g++ -O2 samptoname_u.cc -o samptoname_u +g++ -O2 -pthread schedtest.cc kutrace_lib.cc -o schedtest +g++ -O2 -pthread server4.cc dclab_log.cc dclab_rpc.cc kutrace_lib.cc spinlock.cc -o server4 +g++ -O2 -pthread server_disk.cc dclab_log.cc dclab_rpc.cc kutrace_lib.cc spinlock_fixed.cc -o server_disk +g++ -O2 -pthread server_mystery21.cc dclab_log.cc dclab_rpc.cc kutrace_lib.cc spinlock_fixed.cc -o server_mystery21 +g++ -O2 spantospan.cc -o spantospan +g++ -O2 spantoprof.cc -o spantoprof +g++ -O2 spantotrim.cc from_base40.cc -o spantotrim +g++ -O2 timealign.cc dclab_log.cc dclab_rpc.cc kutrace_lib.cc -o timealign +g++ -O2 time_getpid.cc kutrace_lib.cc -o time_getpid +g++ -O2 unmakeself.cc -o unmakeself +g++ -O2 whetstone_ku.c kutrace_lib.cc -lm -o whetstone_ku + + diff --git a/book-user-code/cpuhog_highissue.cc b/book-user-code/cpuhog_highissue.cc new file mode 100644 index 000000000000..afddc432f425 --- /dev/null +++ b/book-user-code/cpuhog_highissue.cc @@ -0,0 +1,83 @@ +/* Bastardized from Jenkins hash (subset aligned 32-bit words) */ +/* http://www.burtleburtle.net/bob/hash/doobs.html */ +/* Chosen only because it fills up issue slots without much memory activity */ +/* If you want a modern hash, look into murmur hash */ +// Copyright 2021 Richard L. Sites + +#include +#include /* gettimeofday */ + +typedef unsigned long int uint64; +typedef unsigned int uint32; + +/* Count is chosen to run main loop about 4 minutes */ +static const int kLOOPCOUNT = 120 * 1000000; + +/* Size is chosen to fit into a little less thsan 4KB */ +static const int kSIZE = 960; /* 4-byte words */ + + +/* Return time of day in usec */ +uint64 gettime() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000000L) + tv.tv_usec; +} + +#define mix(a,b,c) \ +{ \ + a -= b; a -= c; a ^= (c>>13); \ + b -= c; b -= a; b ^= (a<<8); \ + c -= a; c -= b; c ^= (b>>13); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<16); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>3); \ + b -= c; b -= a; b ^= (a<<10); \ + c -= a; c -= b; c ^= (b>>15); \ +} + +/* Calculate a hash over s, some multiple of 12 bytes long */ +/* Length is count of 32-bit words */ +uint32 hash(uint32* s, uint32 length, uint32 initval) { + uint32 a,b,c,len; + + /* Set up the internal state */ + len = length; + a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ + c = initval; /* the previous hash value */ + + /*---------------------------------------- handle most of the string */ + while (len >= 3) + { + a += s[0] ; + b += s[1]; + c += s[2]; + mix(a,b,c); + s += 3; len -= 3; + } + /*-------------------------------------------- report the result */ + return c; +} + + +/* Set up to run for about 4-5 minutes */ +int main (int argc, const char** argv) { + /* Simple arbitrary initialization */ + uint32 foo[kSIZE]; /* A little less than 4KB */ + for (int i = 0; i < kSIZE; ++i) {foo[i] = i * 1041667;} + + /* Main loop */ + uint32 hashval = 0; + uint64 start = gettime(); + for (int i = 0; i < kLOOPCOUNT; ++i) { + hashval = hash(foo, kSIZE, hashval); + } + uint64 elapsed = gettime() - start; + + /* Make sure hashval is live */ + fprintf(stdout, "elapsed usec %ld, hashval = %08X\n", elapsed, hashval); + return 0; +} + + diff --git a/book-user-code/cpuhog_lowissue.cc b/book-user-code/cpuhog_lowissue.cc new file mode 100644 index 000000000000..40102876d34f --- /dev/null +++ b/book-user-code/cpuhog_lowissue.cc @@ -0,0 +1,49 @@ +/* Lots of floating double divides */ +/* Chosen only because it fills up time with few issue slots and without much memory activity */ +// Copyright 2021 Richard L. Sites + +#include +#include /* gettimeofday */ + +typedef unsigned long int uint64; +typedef unsigned int uint32; + +/* Return time of day in usec */ +uint64 gettime() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000000L) + tv.tv_usec; +} + + +/* Calculate bogus work */ +/* Length is count of 32-bit words */ +double boguscalc(double initval) { + double d = initval; + for (int i = 0; i < 1000; ++i) { + d /= 1.000000001; + d /= 0.999999999; + } + return d; +} + +/* Count is chosen to run main loop about 4 minutes */ +static const int kLOOPCOUNT = 35 * 1000000; + +/* Set up to run for about 4-5 minutes */ +int main (int argc, const char** argv) { + /* Simple arbitrary initialization */ + double foo = 123456789.0; + + /* Main loop */ + uint64 start = gettime(); + for (int i = 0; i < kLOOPCOUNT; ++i) { + foo = boguscalc(foo); + } + uint64 elapsed = gettime() - start; + + fprintf(stdout, "elapsed usec %ld, foo = %18.17f\n", elapsed, foo); + return 0; +} + + diff --git a/book-user-code/cpuhog_mem.cc b/book-user-code/cpuhog_mem.cc new file mode 100644 index 000000000000..c6b0d228add5 --- /dev/null +++ b/book-user-code/cpuhog_mem.cc @@ -0,0 +1,85 @@ +/* Bastardized from Jenkins hash (subset aligned 32-bit words) */ +/* http://www.burtleburtle.net/bob/hash/doobs.html */ +/* Chosen only because it fills up issue slots without much memory activity */ +/* If you want a modern hash, look into murmur hash */ +/* This version scans about 256KB of memory, so has a non-trivial L2 cache */ +/* footprint if migrated between CPUs */ +// Copyright 2021 Richard L. Sites + +#include +#include /* gettimeofday */ + +typedef unsigned long int uint64; +typedef unsigned int uint32; + +/* Count is chosen to run main loop about 4 minutes */ +static const int kLOOPCOUNT = 200 * 10000; + +/* Size is chosen to fit into a little less thsan 256KB */ +static const int kSIZE = 64 * 960; /* 4-byte words */ + + +/* Return time of day in usec */ +uint64 gettime() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000000L) + tv.tv_usec; +} + +#define mix(a,b,c) \ +{ \ + a -= b; a -= c; a ^= (c>>13); \ + b -= c; b -= a; b ^= (a<<8); \ + c -= a; c -= b; c ^= (b>>13); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<16); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>3); \ + b -= c; b -= a; b ^= (a<<10); \ + c -= a; c -= b; c ^= (b>>15); \ +} + +/* Calculate a hash over s, some multiple of 12 bytes long */ +/* Length is count of 32-bit words */ +uint32 hash(uint32* s, uint32 length, uint32 initval) { + uint32 a,b,c,len; + + /* Set up the internal state */ + len = length; + a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ + c = initval; /* the previous hash value */ + + /*---------------------------------------- handle most of the string */ + while (len >= 3) + { + a += s[0] ; + b += s[1]; + c += s[2]; + mix(a,b,c); + s += 3; len -= 3; + } + /*-------------------------------------------- report the result */ + return c; +} + + +/* Set up to run for about 4-5 minutes */ +int main (int argc, const char** argv) { + /* Simple arbitrary initialization */ + uint32 foo[kSIZE]; /* A little less than 4KB */ + for (int i = 0; i < kSIZE; ++i) {foo[i] = (i & 1023) * 1041667;} + + /* Main loop */ + uint32 hashval = 0; + uint64 start = gettime(); + for (int i = 0; i < kLOOPCOUNT; ++i) { + hashval = hash(foo, kSIZE, hashval); + } + uint64 elapsed = gettime() - start; + + /* Make sure hashval is live */ + fprintf(stdout, "elapsed usec %ld, hashval = %08X\n", elapsed, hashval); + return 0; +} + + diff --git a/book-user-code/d3.v4.min.js b/book-user-code/d3.v4.min.js new file mode 100644 index 000000000000..4224fcf3c38a --- /dev/null +++ b/book-user-code/d3.v4.min.js @@ -0,0 +1,8 @@ +// https://d3js.org Version 4.8.0. Copyright 2017 Mike Bostock. +(function(t,n){"object"==typeof exports&&"undefined"!=typeof module?n(exports):"function"==typeof define&&define.amd?define(["exports"],n):n(t.d3=t.d3||{})})(this,function(t){"use strict";function n(t){return function(n,e){return Rs(t(n),e)}}function e(t,n){return[t,n]}function r(t,n,e){var r=(n-t)/Math.max(0,e),i=Math.floor(Math.log(r)/Math.LN10),o=r/Math.pow(10,i);return i>=0?(o>=Qs?10:o>=Ks?5:o>=tf?2:1)*Math.pow(10,i):-Math.pow(10,-i)/(o>=Qs?10:o>=Ks?5:o>=tf?2:1)}function i(t,n,e){var r=Math.abs(n-t)/Math.max(0,e),i=Math.pow(10,Math.floor(Math.log(r)/Math.LN10)),o=r/i;return o>=Qs?i*=10:o>=Ks?i*=5:o>=tf&&(i*=2),n=0&&(e=t.slice(r+1),t=t.slice(0,r)),t&&!n.hasOwnProperty(t))throw new Error("unknown type: "+t);return{type:t,name:e}})}function y(t,n){for(var e,r=0,i=t.length;r=0&&(n=t.slice(e+1),t=t.slice(0,e)),{type:t,name:n}})}function k(t){return function(){var n=this.__on;if(n){for(var e,r=0,i=-1,o=n.length;rn?1:t>=n?0:NaN}function U(t){return function(){this.removeAttribute(t)}}function D(t){return function(){this.removeAttributeNS(t.space,t.local)}}function O(t,n){return function(){this.setAttribute(t,n)}}function F(t,n){return function(){this.setAttributeNS(t.space,t.local,n)}}function I(t,n){return function(){var e=n.apply(this,arguments);null==e?this.removeAttribute(t):this.setAttribute(t,e)}}function Y(t,n){return function(){var e=n.apply(this,arguments);null==e?this.removeAttributeNS(t.space,t.local):this.setAttributeNS(t.space,t.local,e)}}function B(t){return function(){this.style.removeProperty(t)}}function j(t,n,e){return function(){this.style.setProperty(t,n,e)}}function H(t,n,e){return function(){var r=n.apply(this,arguments);null==r?this.style.removeProperty(t):this.style.setProperty(t,r,e)}}function X(t){return function(){delete this[t]}}function V(t,n){return function(){this[t]=n}}function $(t,n){return function(){var e=n.apply(this,arguments);null==e?delete this[t]:this[t]=e}}function W(t){return t.trim().split(/^|\s+/)}function Z(t){return t.classList||new G(t)}function G(t){this._node=t,this._names=W(t.getAttribute("class")||"")}function J(t,n){for(var e=Z(t),r=-1,i=n.length;++r>8&15|n>>4&240,n>>4&15|240&n,(15&n)<<4|15&n,1)):(n=ql.exec(t))?Nt(parseInt(n[1],16)):(n=Ul.exec(t))?new Ct(n[1],n[2],n[3],1):(n=Dl.exec(t))?new Ct(255*n[1]/100,255*n[2]/100,255*n[3]/100,1):(n=Ol.exec(t))?kt(n[1],n[2],n[3],n[4]):(n=Fl.exec(t))?kt(255*n[1]/100,255*n[2]/100,255*n[3]/100,n[4]):(n=Il.exec(t))?zt(n[1],n[2]/100,n[3]/100,1):(n=Yl.exec(t))?zt(n[1],n[2]/100,n[3]/100,n[4]):Bl.hasOwnProperty(t)?Nt(Bl[t]):"transparent"===t?new Ct(NaN,NaN,NaN,0):null}function Nt(t){return new Ct(t>>16&255,t>>8&255,255&t,1)}function kt(t,n,e,r){return r<=0&&(t=n=e=NaN),new Ct(t,n,e,r)}function Et(t){return t instanceof Tt||(t=St(t)),t?(t=t.rgb(),new Ct(t.r,t.g,t.b,t.opacity)):new Ct}function At(t,n,e,r){return 1===arguments.length?Et(t):new Ct(t,n,e,null==r?1:r)}function Ct(t,n,e,r){this.r=+t,this.g=+n,this.b=+e,this.opacity=+r}function zt(t,n,e,r){return r<=0?t=n=e=NaN:e<=0||e>=1?t=n=NaN:n<=0&&(t=NaN),new Rt(t,n,e,r)}function Pt(t){if(t instanceof Rt)return new Rt(t.h,t.s,t.l,t.opacity);if(t instanceof Tt||(t=St(t)),!t)return new Rt;if(t instanceof Rt)return t;t=t.rgb();var n=t.r/255,e=t.g/255,r=t.b/255,i=Math.min(n,e,r),o=Math.max(n,e,r),u=NaN,a=o-i,c=(o+i)/2;return a?(u=n===o?(e-r)/a+6*(e0&&c<1?0:u,new Rt(u,a,c,t.opacity)}function Lt(t,n,e,r){return 1===arguments.length?Pt(t):new Rt(t,n,e,null==r?1:r)}function Rt(t,n,e,r){this.h=+t,this.s=+n,this.l=+e,this.opacity=+r}function qt(t,n,e){return 255*(t<60?n+(e-n)*t/60:t<180?e:t<240?n+(e-n)*(240-t)/60:n)}function Ut(t){if(t instanceof Ot)return new Ot(t.l,t.a,t.b,t.opacity);if(t instanceof Xt){var n=t.h*jl;return new Ot(t.l,Math.cos(n)*t.c,Math.sin(n)*t.c,t.opacity)}t instanceof Ct||(t=Et(t));var e=Bt(t.r),r=Bt(t.g),i=Bt(t.b),o=Ft((.4124564*e+.3575761*r+.1804375*i)/Xl),u=Ft((.2126729*e+.7151522*r+.072175*i)/Vl);return new Ot(116*u-16,500*(o-u),200*(u-Ft((.0193339*e+.119192*r+.9503041*i)/$l)),t.opacity)}function Dt(t,n,e,r){return 1===arguments.length?Ut(t):new Ot(t,n,e,null==r?1:r)}function Ot(t,n,e,r){this.l=+t,this.a=+n,this.b=+e,this.opacity=+r}function Ft(t){return t>Jl?Math.pow(t,1/3):t/Gl+Wl}function It(t){return t>Zl?t*t*t:Gl*(t-Wl)}function Yt(t){return 255*(t<=.0031308?12.92*t:1.055*Math.pow(t,1/2.4)-.055)}function Bt(t){return(t/=255)<=.04045?t/12.92:Math.pow((t+.055)/1.055,2.4)}function jt(t){if(t instanceof Xt)return new Xt(t.h,t.c,t.l,t.opacity);t instanceof Ot||(t=Ut(t));var n=Math.atan2(t.b,t.a)*Hl;return new Xt(n<0?n+360:n,Math.sqrt(t.a*t.a+t.b*t.b),t.l,t.opacity)}function Ht(t,n,e,r){return 1===arguments.length?jt(t):new Xt(t,n,e,null==r?1:r)}function Xt(t,n,e,r){this.h=+t,this.c=+n,this.l=+e,this.opacity=+r}function Vt(t){if(t instanceof Wt)return new Wt(t.h,t.s,t.l,t.opacity);t instanceof Ct||(t=Et(t));var n=t.r/255,e=t.g/255,r=t.b/255,i=(oh*r+rh*n-ih*e)/(oh+rh-ih),o=r-i,u=(eh*(e-i)-th*o)/nh,a=Math.sqrt(u*u+o*o)/(eh*i*(1-i)),c=a?Math.atan2(u,o)*Hl-120:NaN;return new Wt(c<0?c+360:c,a,i,t.opacity)}function $t(t,n,e,r){return 1===arguments.length?Vt(t):new Wt(t,n,e,null==r?1:r)}function Wt(t,n,e,r){this.h=+t,this.s=+n,this.l=+e,this.opacity=+r}function Zt(t,n,e,r,i){var o=t*t,u=o*t;return((1-3*t+3*o-u)*n+(4-6*o+3*u)*e+(1+3*t+3*o-3*u)*r+u*i)/6}function Gt(t,n){return function(e){return t+e*n}}function Jt(t,n,e){return t=Math.pow(t,e),n=Math.pow(n,e)-t,e=1/e,function(r){return Math.pow(t+r*n,e)}}function Qt(t,n){var e=n-t;return e?Gt(t,e>180||e<-180?e-360*Math.round(e/360):e):dh(isNaN(t)?n:t)}function Kt(t){return 1==(t=+t)?tn:function(n,e){return e-n?Jt(n,e,t):dh(isNaN(n)?e:n)}}function tn(t,n){var e=n-t;return e?Gt(t,e):dh(isNaN(t)?n:t)}function nn(t){return function(n){var e,r,i=n.length,o=new Array(i),u=new Array(i),a=new Array(i);for(e=0;e180?n+=360:n-t>180&&(t+=360),o.push({i:e.push(i(e)+"rotate(",null,r)-2,x:xh(t,n)})):n&&e.push(i(e)+"rotate("+n+r)}function a(t,n,e,o){t!==n?o.push({i:e.push(i(e)+"skewX(",null,r)-2,x:xh(t,n)}):n&&e.push(i(e)+"skewX("+n+r)}function c(t,n,e,r,o,u){if(t!==e||n!==r){var a=o.push(i(o)+"scale(",null,",",null,")");u.push({i:a-4,x:xh(t,e)},{i:a-2,x:xh(n,r)})}else 1===e&&1===r||o.push(i(o)+"scale("+e+","+r+")")}return function(n,e){var r=[],i=[];return n=t(n),e=t(e),o(n.translateX,n.translateY,e.translateX,e.translateY,r,i),u(n.rotate,e.rotate,r,i),a(n.skewX,e.skewX,r,i),c(n.scaleX,n.scaleY,e.scaleX,e.scaleY,r,i),n=e=null,function(t){for(var n,e=-1,o=i.length;++e=0&&n._call.call(null,t),n=n._next;--Yh}function xn(){Vh=(Xh=Wh.now())+$h,Yh=Bh=0;try{mn()}finally{Yh=0,wn(),Vh=0}}function bn(){var t=Wh.now(),n=t-Xh;n>Hh&&($h-=n,Xh=t)}function wn(){for(var t,n,e=fh,r=1/0;e;)e._call?(r>e._time&&(r=e._time),t=e,e=e._next):(n=e._next,e._next=null,e=t?t._next=n:fh=n);lh=t,Mn(r)}function Mn(t){if(!Yh){Bh&&(Bh=clearTimeout(Bh));var n=t-Vh;n>24?(t<1/0&&(Bh=setTimeout(xn,n)),jh&&(jh=clearInterval(jh))):(jh||(Xh=Vh,jh=setInterval(bn,Hh)),Yh=1,Zh(xn))}}function Tn(t,n){var e=t.__transition;if(!e||!(e=e[n])||e.state>tp)throw new Error("too late");return e}function Sn(t,n){var e=t.__transition;if(!e||!(e=e[n])||e.state>ep)throw new Error("too late");return e}function Nn(t,n){var e=t.__transition;if(!e||!(e=e[n]))throw new Error("too late");return e}function kn(t,n,e){function r(t){e.state=np,e.timer.restart(i,e.delay,e.time),e.delay<=t&&i(t-e.delay)}function i(r){var s,f,l,h;if(e.state!==np)return u();for(s in c)if(h=c[s],h.name===e.name){if(h.state===rp)return Gh(i);h.state===ip?(h.state=up,h.timer.stop(),h.on.call("interrupt",t,t.__data__,h.index,h.group),delete c[s]):+s=0&&(t=t.slice(0,n)),!t||"start"===t})}function Xn(t,n,e){var r,i,o=Hn(n)?Tn:Sn;return function(){var u=o(this,t),a=u.on;a!==r&&(i=(r=a).copy()).on(n,e),u.on=i}}function Vn(t){return function(){var n=this.parentNode;for(var e in this.__transition)if(+e!==t)return;n&&n.removeChild(this)}}function $n(t,n){var e,r,i;return function(){var o=cl(this).getComputedStyle(this,null),u=o.getPropertyValue(t),a=(this.style.removeProperty(t),o.getPropertyValue(t));return u===a?null:u===e&&a===r?i:i=n(e=u,r=a)}}function Wn(t){return function(){this.style.removeProperty(t)}}function Zn(t,n,e){var r,i;return function(){var o=cl(this).getComputedStyle(this,null).getPropertyValue(t);return o===e?null:o===r?i:i=n(r=o,e)}}function Gn(t,n,e){var r,i,o;return function(){var u=cl(this).getComputedStyle(this,null),a=u.getPropertyValue(t),c=e(this);return null==c&&(this.style.removeProperty(t),c=u.getPropertyValue(t)),a===c?null:a===r&&c===i?o:o=n(r=a,i=c)}}function Jn(t,n,e){function r(){var r=this,i=n.apply(r,arguments);return i&&function(n){r.style.setProperty(t,i(n),e)}}return r._value=n,r}function Qn(t){return function(){this.textContent=t}}function Kn(t){return function(){var n=t(this);this.textContent=null==n?"":n}}function te(t,n,e,r){this._groups=t,this._parents=n,this._name=e,this._id=r}function ne(t){return _t().transition(t)}function ee(){return++Ap}function re(t){return+t}function ie(t){return t*t}function oe(t){return t*(2-t)}function ue(t){return((t*=2)<=1?t*t:--t*(2-t)+1)/2}function ae(t){return t*t*t}function ce(t){return--t*t*t+1}function se(t){return((t*=2)<=1?t*t*t:(t-=2)*t*t+2)/2}function fe(t){return 1-Math.cos(t*qp)}function le(t){return Math.sin(t*qp)}function he(t){return(1-Math.cos(Rp*t))/2}function pe(t){return Math.pow(2,10*t-10)}function de(t){return 1-Math.pow(2,-10*t)}function ve(t){return((t*=2)<=1?Math.pow(2,10*t-10):2-Math.pow(2,10-10*t))/2}function _e(t){return 1-Math.sqrt(1-t*t)}function ge(t){return Math.sqrt(1- --t*t)}function ye(t){return((t*=2)<=1?1-Math.sqrt(1-t*t):Math.sqrt(1-(t-=2)*t)+1)/2}function me(t){return 1-xe(1-t)}function xe(t){return(t=+t)Math.abs(t[1]-O[1])?M=!0:w=!0),O=t,b=!0,od(),o()}function o(){var t;switch(m=O[0]-D[0],x=O[1]-D[1],N){case ad:case ud:k&&(m=Math.max(P-l,Math.min(R-v,m)),h=l+m,_=v+m),E&&(x=Math.max(L-p,Math.min(q-g,x)),d=p+x,y=g+x);break;case cd:k<0?(m=Math.max(P-l,Math.min(R-l,m)),h=l+m,_=v):k>0&&(m=Math.max(P-v,Math.min(R-v,m)),h=l,_=v+m),E<0?(x=Math.max(L-p,Math.min(q-p,x)),d=p+x,y=g):E>0&&(x=Math.max(L-g,Math.min(q-g,x)),d=p,y=g+x);break;case sd:k&&(h=Math.max(P,Math.min(R,l-m*k)),_=Math.max(P,Math.min(R,v+m*k))),E&&(d=Math.max(L,Math.min(q,p-x*E)),y=Math.max(L,Math.min(q,g+x*E)))}_0&&(l=h-m),E<0?g=y-x:E>0&&(p=d-x),N=ad,Y.attr("cursor",pd.selection),o());break;default:return}od()}function s(){switch(t.event.keyCode){case 16:U&&(w=M=U=!1,o());break;case 18:N===sd&&(k<0?v=_:k>0&&(l=h),E<0?g=y:E>0&&(p=d),N=cd,o());break;case 32:N===ad&&(t.event.altKey?(k&&(v=_-m*k,l=h+m*k),E&&(g=y-x*E,p=d+x*E),N=sd):(k<0?v=_:k>0&&(l=h),E<0?g=y:E>0&&(p=d),N=cd),Y.attr("cursor",pd[S]),o());break;default:return}od()}if(t.event.touches){if(t.event.changedTouches.length=(o=(v+g)/2))?v=o:g=o,(f=e>=(u=(_+y)/2))?_=u:y=u,i=p,!(p=p[l=f<<1|s]))return i[l]=d,t;if(a=+t._x.call(null,p.data),c=+t._y.call(null,p.data),n===a&&e===c)return d.next=p,i?i[l]=d:t._root=d,t;do{i=i?i[l]=new Array(4):t._root=new Array(4),(s=n>=(o=(v+g)/2))?v=o:g=o,(f=e>=(u=(_+y)/2))?_=u:y=u}while((l=f<<1|s)==(h=(c>=u)<<1|a>=o));return i[h]=p,i[l]=d,t}function Ke(t){var n,e,r,i,o=t.length,u=new Array(o),a=new Array(o),c=1/0,s=1/0,f=-1/0,l=-1/0;for(e=0;ef&&(f=r),il&&(l=i));for(f",i=n[3]||"-",o=n[4]||"",u=!!n[5],a=n[6]&&+n[6],c=!!n[7],s=n[8]&&+n[8].slice(1),f=n[9]||"";"n"===f?(c=!0, +f="g"):kv[f]||(f=""),(u||"0"===e&&"="===r)&&(u=!0,e="0",r="="),this.fill=e,this.align=r,this.sign=i,this.symbol=o,this.zero=u,this.width=a,this.comma=c,this.precision=s,this.type=f}function dr(n){return Av=Pv(n),t.format=Av.format,t.formatPrefix=Av.formatPrefix,Av}function vr(){this.reset()}function _r(t,n,e){var r=t.s=n+e,i=r-n,o=r-i;t.t=n-o+(e-i)}function gr(t){return t>1?0:t<-1?__:Math.acos(t)}function yr(t){return t>1?g_:t<-1?-g_:Math.asin(t)}function mr(t){return(t=C_(t/2))*t}function xr(){}function br(t,n){t&&q_.hasOwnProperty(t.type)&&q_[t.type](t,n)}function wr(t,n,e){var r,i=-1,o=t.length-e;for(n.lineStart();++i=0?1:-1,i=r*e,o=S_(n),u=C_(n),a=Yv*u,c=Iv*o+a*S_(i),s=a*r*C_(i);D_.add(T_(s,c)),Fv=t,Iv=o,Yv=u}function Er(t){return[T_(t[1],t[0]),yr(t[2])]}function Ar(t){var n=t[0],e=t[1],r=S_(e);return[r*S_(n),r*C_(n),C_(e)]}function Cr(t,n){return t[0]*n[0]+t[1]*n[1]+t[2]*n[2]}function zr(t,n){return[t[1]*n[2]-t[2]*n[1],t[2]*n[0]-t[0]*n[2],t[0]*n[1]-t[1]*n[0]]}function Pr(t,n){t[0]+=n[0],t[1]+=n[1],t[2]+=n[2]}function Lr(t,n){return[t[0]*n,t[1]*n,t[2]*n]}function Rr(t){var n=P_(t[0]*t[0]+t[1]*t[1]+t[2]*t[2]);t[0]/=n,t[1]/=n,t[2]/=n}function qr(t,n){Gv.push(Jv=[Bv=t,Hv=t]),nXv&&(Xv=n)}function Ur(t,n){var e=Ar([t*b_,n*b_]);if(Zv){var r=zr(Zv,e),i=[r[1],-r[0],0],o=zr(i,r);Rr(o),o=Er(o);var u,a=t-Vv,c=a>0?1:-1,s=o[0]*x_*c,f=w_(a)>180;f^(c*VvXv&&(Xv=u):(s=(s+360)%360-180,f^(c*VvXv&&(Xv=n))),f?tBr(Bv,Hv)&&(Hv=t):Br(t,Hv)>Br(Bv,Hv)&&(Bv=t):Hv>=Bv?(tHv&&(Hv=t)):t>Vv?Br(Bv,t)>Br(Bv,Hv)&&(Hv=t):Br(t,Hv)>Br(Bv,Hv)&&(Bv=t)}else Gv.push(Jv=[Bv=t,Hv=t]);nXv&&(Xv=n),Zv=e,Vv=t}function Dr(){B_.point=Ur}function Or(){Jv[0]=Bv,Jv[1]=Hv,B_.point=qr,Zv=null}function Fr(t,n){if(Zv){var e=t-Vv;Y_.add(w_(e)>180?e+(e>0?360:-360):e)}else $v=t,Wv=n;F_.point(t,n),Ur(t,n)}function Ir(){F_.lineStart()}function Yr(){Fr($v,Wv),F_.lineEnd(),w_(Y_)>v_&&(Bv=-(Hv=180)),Jv[0]=Bv,Jv[1]=Hv,Zv=null}function Br(t,n){return(n-=t)<0?n+360:n}function jr(t,n){return t[0]-n[0]}function Hr(t,n){return t[0]<=t[1]?t[0]<=n&&n<=t[1]:n__?t-m_:t<-__?t+m_:t,n]}function ei(t,n,e){return(t%=m_)?n||e?$_(ii(t),oi(n,e)):ii(t):n||e?oi(n,e):ni}function ri(t){return function(n,e){return n+=t,[n>__?n-m_:n<-__?n+m_:n,e]}}function ii(t){var n=ri(t);return n.invert=ri(-t),n}function oi(t,n){function e(t,n){var e=S_(n),a=S_(t)*e,c=C_(t)*e,s=C_(n),f=s*r+a*i;return[T_(c*o-f*u,a*r-s*i),yr(f*o+c*u)]}var r=S_(t),i=C_(t),o=S_(n),u=C_(n);return e.invert=function(t,n){var e=S_(n),a=S_(t)*e,c=C_(t)*e,s=C_(n),f=s*o-c*u;return[T_(c*o+s*u,a*r+f*i),yr(f*r-a*i)]},e}function ui(t,n,e,r,i,o){if(e){var u=S_(n),a=C_(n),c=r*e;null==i?(i=n+r*m_,o=n-c/2):(i=ai(u,i),o=ai(u,o),(r>0?io)&&(i+=r*m_));for(var s,f=i;r>0?f>o:f0)do{s.point(0===f||3===f?t:e,f>1?r:n)}while((f=(f+a+4)%4)!==l);else s.point(o[0],o[1])}function u(r,i){return w_(r[0]-t)0?0:3:w_(r[0]-e)0?2:1:w_(r[1]-n)0?1:0:i>0?3:2}function a(t,n){return c(t.x,n.x)}function c(t,n){var e=u(t,1),r=u(n,1);return e!==r?e-r:0===e?n[1]-t[1]:1===e?t[0]-n[0]:2===e?t[1]-n[1]:n[0]-t[0]}return function(u){function c(t,n){i(t,n)&&N.point(t,n)}function s(){for(var n=0,e=0,i=_.length;er&&(l-o)*(r-u)>(h-u)*(t-o)&&++n:h<=r&&(l-o)*(r-u)<(h-u)*(t-o)&&--n;return n}function f(){N=k,v=[],_=[],S=!0}function l(){var t=s(),n=S&&t,e=(v=lf(v)).length;(n||e)&&(u.polygonStart(),n&&(u.lineStart(),o(null,null,1,u),u.lineEnd()),e&&fg(v,a,t,o,u),u.polygonEnd()),N=u,v=_=g=null}function h(){E.point=d,_&&_.push(g=[]),T=!0,M=!1,b=w=NaN}function p(){v&&(d(y,m),x&&M&&k.rejoin(),v.push(k.result())),E.point=c,M&&N.lineEnd()}function d(o,u){var a=i(o,u);if(_&&g.push([o,u]),T)y=o,m=u,x=a,T=!1,a&&(N.lineStart(),N.point(o,u));else if(a&&M)N.point(o,u);else{var c=[b=Math.max(hg,Math.min(lg,b)),w=Math.max(hg,Math.min(lg,w))],s=[o=Math.max(hg,Math.min(lg,o)),u=Math.max(hg,Math.min(lg,u))];cg(c,s,t,n,e,r)?(M||(N.lineStart(),N.point(c[0],c[1])),N.point(s[0],s[1]),a||N.lineEnd(),S=!1):a&&(N.lineStart(),N.point(o,u),S=!1)}b=o,w=u,M=a}var v,_,g,y,m,x,b,w,M,T,S,N=u,k=ag(),E={point:c,lineStart:h,lineEnd:p,polygonStart:f,polygonEnd:l};return E}}function li(){gg.point=pi,gg.lineEnd=hi}function hi(){gg.point=gg.lineEnd=xr}function pi(t,n){t*=b_,n*=b_,W_=t,Z_=C_(n),G_=S_(n),gg.point=di}function di(t,n){t*=b_,n*=b_;var e=C_(n),r=S_(n),i=w_(t-W_),o=S_(i),u=C_(i),a=r*u,c=G_*e-Z_*r*o,s=Z_*e+G_*r*o;_g.add(T_(P_(a*a+c*c),s)),W_=t,Z_=e,G_=r}function vi(t,n){return!(!t||!Mg.hasOwnProperty(t.type))&&Mg[t.type](t,n)}function _i(t,n){return 0===bg(t,n)}function gi(t,n){var e=bg(t[0],t[1]);return bg(t[0],n)+bg(n,t[1])<=e+v_}function yi(t,n){return!!vg(t.map(mi),xi(n))}function mi(t){return t=t.map(xi),t.pop(),t}function xi(t){return[t[0]*b_,t[1]*b_]}function bi(t,n,e){var r=Js(t,n-v_,e).concat(n);return function(t){return r.map(function(n){return[t,n]})}}function wi(t,n,e){var r=Js(t,n-v_,e).concat(n);return function(t){return r.map(function(n){return[n,t]})}}function Mi(){function t(){return{type:"MultiLineString",coordinates:n()}}function n(){return Js(N_(o/_)*_,i,_).map(h).concat(Js(N_(s/g)*g,c,g).map(p)).concat(Js(N_(r/d)*d,e,d).filter(function(t){return w_(t%_)>v_}).map(f)).concat(Js(N_(a/v)*v,u,v).filter(function(t){return w_(t%g)>v_}).map(l))}var e,r,i,o,u,a,c,s,f,l,h,p,d=10,v=d,_=90,g=360,y=2.5;return t.lines=function(){return n().map(function(t){return{type:"LineString",coordinates:t}})},t.outline=function(){return{type:"Polygon",coordinates:[h(o).concat(p(c).slice(1),h(i).reverse().slice(1),p(s).reverse().slice(1))]}},t.extent=function(n){return arguments.length?t.extentMajor(n).extentMinor(n):t.extentMinor()},t.extentMajor=function(n){return arguments.length?(o=+n[0][0],i=+n[1][0],s=+n[0][1],c=+n[1][1],o>i&&(n=o,o=i,i=n),s>c&&(n=s,s=c,c=n),t.precision(y)):[[o,s],[i,c]]},t.extentMinor=function(n){return arguments.length?(r=+n[0][0],e=+n[1][0],a=+n[0][1],u=+n[1][1],r>e&&(n=r,r=e,e=n),a>u&&(n=a,a=u,u=n),t.precision(y)):[[r,a],[e,u]]},t.step=function(n){return arguments.length?t.stepMajor(n).stepMinor(n):t.stepMinor()},t.stepMajor=function(n){return arguments.length?(_=+n[0],g=+n[1],t):[_,g]},t.stepMinor=function(n){return arguments.length?(d=+n[0],v=+n[1],t):[d,v]},t.precision=function(n){return arguments.length?(y=+n,f=bi(a,u,90),l=wi(r,e,y),h=bi(s,c,90),p=wi(o,i,y),t):y},t.extentMajor([[-180,-90+v_],[180,90-v_]]).extentMinor([[-180,-80-v_],[180,80+v_]])}function Ti(){return Mi()()}function Si(){Ag.point=Ni}function Ni(t,n){Ag.point=ki,J_=K_=t,Q_=tg=n}function ki(t,n){Eg.add(tg*t-K_*n),K_=t,tg=n}function Ei(){ki(J_,Q_)}function Ai(t,n){tPg&&(Pg=t),nLg&&(Lg=n)}function Ci(t,n){qg+=t,Ug+=n,++Dg}function zi(){Hg.point=Pi}function Pi(t,n){Hg.point=Li,Ci(rg=t,ig=n)}function Li(t,n){var e=t-rg,r=n-ig,i=P_(e*e+r*r);Og+=i*(rg+t)/2,Fg+=i*(ig+n)/2,Ig+=i,Ci(rg=t,ig=n)}function Ri(){Hg.point=Ci}function qi(){Hg.point=Di}function Ui(){Oi(ng,eg)}function Di(t,n){Hg.point=Oi,Ci(ng=rg=t,eg=ig=n)}function Oi(t,n){var e=t-rg,r=n-ig,i=P_(e*e+r*r);Og+=i*(rg+t)/2,Fg+=i*(ig+n)/2,Ig+=i,i=ig*t-rg*n,Yg+=i*(rg+t),Bg+=i*(ig+n),jg+=3*i,Ci(rg=t,ig=n)}function Fi(t){this._context=t}function Ii(t,n){Jg.point=Yi,Vg=Wg=t,$g=Zg=n}function Yi(t,n){Wg-=t,Zg-=n,Gg.add(P_(Wg*Wg+Zg*Zg)),Wg=t,Zg=n}function Bi(){this._string=[]}function ji(t){return"m0,"+t+"a"+t+","+t+" 0 1,1 0,"+-2*t+"a"+t+","+t+" 0 1,1 0,"+2*t+"z"}function Hi(t){return t.length>1}function Xi(t,n){return((t=t.x)[0]<0?t[1]-g_-v_:g_-t[1])-((n=n.x)[0]<0?n[1]-g_-v_:g_-n[1])}function Vi(t){var n,e=NaN,r=NaN,i=NaN;return{lineStart:function(){t.lineStart(),n=1},point:function(o,u){var a=o>0?__:-__,c=w_(o-e);w_(c-__)0?g_:-g_),t.point(i,r),t.lineEnd(),t.lineStart(),t.point(a,r),t.point(o,r),n=0):i!==a&&c>=__&&(w_(e-i)v_?M_((C_(n)*(o=S_(r))*C_(e)-C_(r)*(i=S_(n))*C_(t))/(i*o*u)):(n+r)/2}function Wi(t,n,e,r){var i;if(null==t)i=e*g_,r.point(-__,i),r.point(0,i),r.point(__,i),r.point(__,0),r.point(__,-i),r.point(0,-i),r.point(-__,-i),r.point(-__,0),r.point(-__,i);else if(w_(t[0]-n[0])>v_){var o=t[0]4*n&&v--){var x=u+h,b=a+p,w=c+d,M=P_(x*x+b*b+w*w),T=yr(w/=M),S=w_(w_(w)-1)n||w_((g*A+y*C)/m-.5)>.3||u*h+a*p+c*d2?t[2]%360*b_:0,i()):[b*x_,w*x_,M*x_]},n.precision=function(t){return arguments.length?(A=oy(r,E=t*t),o()):P_(E)},n.fitExtent=function(t,e){return Ji(n,t,e)},n.fitSize=function(t,e){return Qi(n,t,e)},function(){return u=t.apply(this,arguments),n.invert=u.invert&&e,i()}}function ro(t){var n=0,e=__/3,r=eo(t),i=r(n,e);return i.parallels=function(t){return arguments.length?r(n=t[0]*b_,e=t[1]*b_):[n*x_,e*x_]},i}function io(t){function n(t,n){return[t*e,C_(n)/e]}var e=S_(t);return n.invert=function(t,n){return[t/e,yr(n*e)]},n}function oo(t,n){function e(t,n){var e=P_(o-2*i*C_(n))/i;return[e*C_(t*=i),u-e*S_(t)]}var r=C_(t),i=(r+C_(n))/2;if(w_(i)0?n<-g_+v_&&(n=-g_+v_):n>g_-v_&&(n=g_-v_);var e=o/A_(lo(n),i);return[e*C_(i*t),o-e*S_(i*t)]}var r=S_(t),i=t===n?C_(t):E_(r/S_(n))/E_(lo(n)/lo(t)),o=r*A_(lo(t),i)/i;return i?(e.invert=function(t,n){var e=o-n,r=z_(i)*P_(t*t+e*e);return[T_(t,w_(e))/i*z_(e),2*M_(A_(o/r,1/i))-g_]},e):so}function po(t,n){return[t,n]}function vo(t,n){function e(t,n){var e=o-n,r=i*t;return[e*C_(r),o-e*S_(r)]}var r=S_(t),i=t===n?C_(t):(r-S_(n))/(n-t),o=r/i+t;return w_(i)=0;)n+=e[r].value;else n=1;t.value=n}function Ao(t,n){if(t===n)return t;var e=t.ancestors(),r=n.ancestors(),i=null;for(t=e.pop(),n=r.pop();t===n;)i=t,t=e.pop(),n=r.pop();return i}function Co(t,n){var e,r,i,o,u,a=new qo(t),c=+t.value&&(a.value=t.value),s=[a];for(null==n&&(n=Po);e=s.pop();)if(c&&(e.value=+e.data.value),(i=n(e.data))&&(u=i.length))for(e.children=new Array(u),o=u-1;o>=0;--o)s.push(r=e.children[o]=new qo(i[o])),r.parent=e,r.depth=e.depth+1;return a.eachBefore(Ro)}function zo(){return Co(this).eachBefore(Lo)}function Po(t){return t.children}function Lo(t){t.data=t.data.data}function Ro(t){var n=0;do{t.height=n}while((t=t.parent)&&t.height<++n)}function qo(t){this.data=t,this.depth=this.height=0,this.parent=null}function Uo(t){this._=t,this.next=null}function Do(t,n){var e=n.x-t.x,r=n.y-t.y,i=t.r-n.r;return i*i+1e-6>e*e+r*r}function Oo(t,n){var e,r,i,o=null,u=t.head;switch(n.length){case 1:e=Fo(n[0]);break;case 2:e=Io(n[0],n[1]);break;case 3:e=Yo(n[0],n[1],n[2])}for(;u;)i=u._,r=u.next,e&&Do(e,i)?o=u:(o?(t.tail=o,o.next=null):t.head=t.tail=null,n.push(i),e=Oo(t,n),n.pop(),t.head?(u.next=t.head,t.head=u):(u.next=null,t.head=t.tail=u),o=t.tail,o.next=r),u=r;return t.tail=o,e}function Fo(t){return{x:t.x,y:t.y,r:t.r}}function Io(t,n){var e=t.x,r=t.y,i=t.r,o=n.x,u=n.y,a=n.r,c=o-e,s=u-r,f=a-i,l=Math.sqrt(c*c+s*s);return{x:(e+o+c/l*f)/2,y:(r+u+s/l*f)/2,r:(l+i+a)/2}}function Yo(t,n,e){var r=t.x,i=t.y,o=t.r,u=n.x,a=n.y,c=n.r,s=e.x,f=e.y,l=e.r,h=2*(r-u),p=2*(i-a),d=2*(c-o),v=r*r+i*i-o*o-u*u-a*a+c*c,_=2*(r-s),g=2*(i-f),y=2*(l-o),m=r*r+i*i-o*o-s*s-f*f+l*l,x=_*p-h*g,b=(p*m-g*v)/x-r,w=(g*d-p*y)/x,M=(_*v-h*m)/x-i,T=(h*y-_*d)/x,S=w*w+T*T-1,N=2*(b*w+M*T+o),k=b*b+M*M-o*o,E=(-N-Math.sqrt(N*N-4*S*k))/(2*S);return{x:b+w*E+r,y:M+T*E+i,r:E}}function Bo(t,n,e){var r=t.x,i=t.y,o=n.r+e.r,u=t.r+e.r,a=n.x-r,c=n.y-i,s=a*a+c*c;if(s){var f=.5+((u*=u)-(o*=o))/(2*s),l=Math.sqrt(Math.max(0,2*o*(u+s)-(u-=s)*u-o*o))/(2*s);e.x=r+f*a+l*c,e.y=i+f*c-l*a}else e.x=r+u,e.y=i}function jo(t,n){var e=n.x-t.x,r=n.y-t.y,i=t.r+n.r;return i*i-1e-6>e*e+r*r}function Ho(t,n,e){var r=t._,i=t.next._,o=r.r+i.r,u=(r.x*i.r+i.x*r.r)/o-n,a=(r.y*i.r+i.y*r.r)/o-e;return u*u+a*a}function Xo(t){this._=t,this.next=null,this.previous=null}function Vo(t){if(!(i=t.length))return 0;var n,e,r,i;if(n=t[0],n.x=0,n.y=0,!(i>1))return n.r;if(e=t[1],n.x=-e.r,e.x=n.r,e.y=0,!(i>2))return n.r+e.r;Bo(e,n,r=t[2]);var o,u,a,c,s,f,l,h=n.r*n.r,p=e.r*e.r,d=r.r*r.r,v=h+p+d,_=h*n.x+p*e.x+d*r.x,g=h*n.y+p*e.y+d*r.y;n=new Xo(n),e=new Xo(e),r=new Xo(r),n.next=r.previous=e,e.next=n.previous=r,r.next=e.previous=n;t:for(a=3;a=0;)n=i[o],n.z+=e,n.m+=e,e+=n.s+(r+=n.c)}function au(t,n,e){return t.a.parent===n.parent?t.a:e}function cu(t,n){this._=t,this.parent=null,this.children=null,this.A=null,this.a=this,this.z=0,this.m=0,this.c=0,this.s=0,this.t=null,this.i=n}function su(t){for(var n,e,r,i,o,u=new cu(t,0),a=[u];n=a.pop();)if(r=n._.children)for(n.children=new Array(o=r.length),i=o-1;i>=0;--i)a.push(e=n.children[i]=new cu(r[i],i)),e.parent=n;return(u.parent=new cu(null,0)).children=[u],u}function fu(t,n,e,r,i,o){for(var u,a,c,s,f,l,h,p,d,v,_,g=[],y=n.children,m=0,x=0,b=y.length,w=n.value;mh&&(h=a),_=f*f*v,(p=Math.max(h/_,_/l))>d){f-=a;break}d=p}g.push(u={value:f,dice:c1&&rm(t[e[r-2]],t[e[r-1]],t[i])<=0;)--r;e[r++]=i}return e.slice(0,r)}function pu(t){if(!(t>=1))throw new Error;this._size=t,this._call=this._error=null,this._tasks=[],this._data=[],this._waiting=this._active=this._ended=this._start=0}function du(t){if(!t._start)try{vu(t)}catch(n){if(t._tasks[t._ended+t._active-1])gu(t,n);else if(!t._data)throw n}}function vu(t){for(;t._start=t._waiting&&t._active=0;)if((e=t._tasks[r])&&(t._tasks[r]=null,e.abort))try{e.abort()}catch(n){}t._active=NaN,yu(t)}function yu(t){if(!t._active&&t._call){var n=t._data;t._data=void 0,t._call(t._error,n)}}function mu(t){return new pu(arguments.length?+t:1/0)}function xu(t){return function(n,e){t(null==n?e:null)}}function bu(t){var n=t.responseType;return n&&"text"!==n?t.response:t.responseText}function wu(t,n){return function(e){return t(e.responseText,n)}}function Mu(t){function n(n){var o=n+"",u=e.get(o);if(!u){if(i!==km)return i;e.set(o,u=r.push(n))}return t[(u-1)%t.length]}var e=Be(),r=[],i=km;return t=null==t?[]:Nm.call(t),n.domain=function(t){if(!arguments.length)return r.slice();r=[],e=Be();for(var i,o,u=-1,a=t.length;++u=e?1:r(t)}}}function Au(t){return function(n,e){var r=t(n=+n,e=+e);return function(t){return t<=0?n:t>=1?e:r(t)}}}function Cu(t,n,e,r){var i=t[0],o=t[1],u=n[0],a=n[1];return o2?zu:Cu,o=u=null,r}function r(n){return(o||(o=i(a,c,f?Eu(t):t,s)))(+n)}var i,o,u,a=Cm,c=Cm,s=Sh,f=!1;return r.invert=function(t){return(u||(u=i(c,a,ku,f?Au(n):n)))(+t)},r.domain=function(t){return arguments.length?(a=Sm.call(t,Am),e()):a.slice()},r.range=function(t){return arguments.length?(c=Nm.call(t),e()):c.slice()},r.rangeRound=function(t){return c=Nm.call(t),s=Nh,e()},r.clamp=function(t){return arguments.length?(f=!!t,e()):f},r.interpolate=function(t){return arguments.length?(s=t,e()):s},e()}function Ru(t){var n=t.domain;return t.ticks=function(t){var e=n();return nf(e[0],e[e.length-1],null==t?10:t)},t.tickFormat=function(t,e){return zm(n(),t,e)},t.nice=function(e){var r=n(),o=r.length-1,u=null==e?10:e,a=r[0],c=r[o],s=i(a,c,u);return s&&(s=i(Math.floor(a/s)*s,Math.ceil(c/s)*s,u),r[0]=Math.floor(a/s)*s,r[o]=Math.ceil(c/s)*s,n(r)),t},t}function qu(){var t=Lu(ku,xh);return t.copy=function(){return Pu(t,qu())},Ru(t)}function Uu(){function t(t){return+t}var n=[0,1];return t.invert=t,t.domain=t.range=function(e){return arguments.length?(n=Sm.call(e,Am),t):n.slice()},t.copy=function(){return Uu().domain(n)},Ru(t)}function Du(t,n){return(n=Math.log(n/t))?function(e){return Math.log(e/t)/n}:Em(n)}function Ou(t,n){return t<0?function(e){return-Math.pow(-n,e)*Math.pow(-t,1-e)}:function(e){return Math.pow(n,e)*Math.pow(t,1-e)}}function Fu(t){return isFinite(t)?+("1e"+t):t<0?0:t}function Iu(t){return 10===t?Fu:t===Math.E?Math.exp:function(n){return Math.pow(t,n)}}function Yu(t){return t===Math.E?Math.log:10===t&&Math.log10||2===t&&Math.log2||(t=Math.log(t),function(n){return Math.log(n)/t})}function Bu(t){return function(n){return-t(-n)}}function ju(){function n(){return o=Yu(i),u=Iu(i),r()[0]<0&&(o=Bu(o),u=Bu(u)),e}var e=Lu(Du,Ou).domain([1,10]),r=e.domain,i=10,o=Yu(10),u=Iu(10);return e.base=function(t){return arguments.length?(i=+t,n()):i},e.domain=function(t){return arguments.length?(r(t),n()):r()},e.ticks=function(t){var n,e=r(),a=e[0],c=e[e.length-1];(n=c0){for(;hc)break;v.push(l)}}else for(;h=1;--f)if(!((l=s*f)c)break;v.push(l)}}else v=nf(h,p,Math.min(p-h,d)).map(u);return n?v.reverse():v},e.tickFormat=function(n,r){if(null==r&&(r=10===i?".0e":","),"function"!=typeof r&&(r=t.format(r)),n===1/0)return r;null==n&&(n=10);var a=Math.max(1,i*n/e.ticks().length);return function(t){var n=t/u(Math.round(o(t)));return n*i0?i[n-1]:e[0],n=i?[o[i-1],r]:[o[n-1],o[n]]},t.copy=function(){return Wu().domain([e,r]).range(u)},Ru(t)}function Zu(){function t(t){if(t<=t)return e[Ds(n,t,0,r)]}var n=[.5],e=[0,1],r=1;return t.domain=function(i){return arguments.length?(n=Nm.call(i),r=Math.min(n.length,e.length-1),t):n.slice()},t.range=function(i){return arguments.length?(e=Nm.call(i),r=Math.min(n.length,e.length-1),t):e.slice()},t.invertExtent=function(t){var r=e.indexOf(t);return[n[r-1],n[r]]},t.copy=function(){return Zu().domain(n).range(e)},t}function Gu(t,n,e,r){function i(n){return t(n=new Date(+n)),n}return i.floor=i,i.ceil=function(e){return t(e=new Date(e-1)),n(e,1),t(e),e},i.round=function(t){var n=i(t),e=i.ceil(t);return t-n0))return u;do{u.push(new Date(+e))}while(n(e,o),t(e),e=n)for(;t(n),!e(n);)n.setTime(n-1)},function(t,r){if(t>=t)for(;--r>=0;)for(;n(t,1),!e(t););})},e&&(i.count=function(n,r){return Lm.setTime(+n),Rm.setTime(+r),t(Lm),t(Rm),Math.floor(e(Lm,Rm))},i.every=function(t){return t=Math.floor(t),isFinite(t)&&t>0?t>1?i.filter(r?function(n){return r(n)%t==0}:function(n){return i.count(0,n)%t==0}):i:null}),i}function Ju(t){return Gu(function(n){n.setDate(n.getDate()-(n.getDay()+7-t)%7),n.setHours(0,0,0,0)},function(t,n){t.setDate(t.getDate()+7*n)},function(t,n){return(n-t-(n.getTimezoneOffset()-t.getTimezoneOffset())*Dm)/Om})}function Qu(t){return Gu(function(n){n.setUTCDate(n.getUTCDate()-(n.getUTCDay()+7-t)%7),n.setUTCHours(0,0,0,0)},function(t,n){t.setUTCDate(t.getUTCDate()+7*n)},function(t,n){return(n-t)/Om})}function Ku(t){if(0<=t.y&&t.y<100){var n=new Date(-1,t.m,t.d,t.H,t.M,t.S,t.L);return n.setFullYear(t.y),n}return new Date(t.y,t.m,t.d,t.H,t.M,t.S,t.L)}function ta(t){if(0<=t.y&&t.y<100){var n=new Date(Date.UTC(-1,t.m,t.d,t.H,t.M,t.S,t.L));return n.setUTCFullYear(t.y),n}return new Date(Date.UTC(t.y,t.m,t.d,t.H,t.M,t.S,t.L))}function na(t){return{y:t,m:0,d:1,H:0,M:0,S:0,L:0}}function ea(t){function n(t,n){return function(e){var r,i,o,u=[],a=-1,c=0,s=t.length;for(e instanceof Date||(e=new Date(+e));++a=c)return-1;if(37===(i=n.charCodeAt(u++))){if(i=n.charAt(u++),!(o=B[i in Ux?n.charAt(u++):i])||(r=o(t,e,r))<0)return-1}else if(i!=e.charCodeAt(r++))return-1}return r}function i(t,n,e){var r=C.exec(n.slice(e));return r?(t.p=z[r[0].toLowerCase()],e+r[0].length):-1}function o(t,n,e){var r=R.exec(n.slice(e));return r?(t.w=q[r[0].toLowerCase()],e+r[0].length):-1}function u(t,n,e){var r=P.exec(n.slice(e));return r?(t.w=L[r[0].toLowerCase()],e+r[0].length):-1}function a(t,n,e){var r=O.exec(n.slice(e));return r?(t.m=F[r[0].toLowerCase()],e+r[0].length):-1}function c(t,n,e){var r=U.exec(n.slice(e));return r?(t.m=D[r[0].toLowerCase()],e+r[0].length):-1}function s(t,n,e){return r(t,w,n,e)}function f(t,n,e){return r(t,M,n,e)}function l(t,n,e){return r(t,T,n,e)}function h(t){return k[t.getDay()]}function p(t){return N[t.getDay()]}function d(t){return A[t.getMonth()]}function v(t){return E[t.getMonth()]}function _(t){return S[+(t.getHours()>=12)]}function g(t){return k[t.getUTCDay()]}function y(t){return N[t.getUTCDay()]}function m(t){return A[t.getUTCMonth()]}function x(t){return E[t.getUTCMonth()]}function b(t){return S[+(t.getUTCHours()>=12)]}var w=t.dateTime,M=t.date,T=t.time,S=t.periods,N=t.days,k=t.shortDays,E=t.months,A=t.shortMonths,C=oa(S),z=ua(S),P=oa(N),L=ua(N),R=oa(k),q=ua(k),U=oa(E),D=ua(E),O=oa(A),F=ua(A),I={a:h,A:p,b:d,B:v,c:null,d:ba,e:ba,H:wa,I:Ma,j:Ta,L:Sa,m:Na,M:ka,p:_,S:Ea,U:Aa,w:Ca,W:za,x:null,X:null,y:Pa,Y:La,Z:Ra,"%":Za},Y={a:g,A:y,b:m,B:x,c:null,d:qa,e:qa,H:Ua,I:Da,j:Oa,L:Fa,m:Ia,M:Ya,p:b,S:Ba,U:ja,w:Ha,W:Xa,x:null,X:null,y:Va,Y:$a,Z:Wa,"%":Za},B={a:o,A:u,b:a,B:c,c:s,d:da,e:da,H:_a,I:_a,j:va,L:ma,m:pa,M:ga,p:i,S:ya,U:ca,w:aa,W:sa,x:f,X:l,y:la,Y:fa,Z:ha,"%":xa};return I.x=n(M,I),I.X=n(T,I),I.c=n(w,I),Y.x=n(M,Y),Y.X=n(T,Y),Y.c=n(w,Y),{format:function(t){var e=n(t+="",I);return e.toString=function(){return t},e},parse:function(t){var n=e(t+="",Ku);return n.toString=function(){return t},n},utcFormat:function(t){var e=n(t+="",Y);return e.toString=function(){return t},e},utcParse:function(t){var n=e(t,ta);return n.toString=function(){return t},n}}}function ra(t,n,e){var r=t<0?"-":"",i=(r?-t:t)+"",o=i.length;return r+(o68?1900:2e3),e+r[0].length):-1}function ha(t,n,e){var r=/^(Z)|([+-]\d\d)(?:\:?(\d\d))?/.exec(n.slice(e,e+6));return r?(t.Z=r[1]?0:-(r[2]+(r[3]||"00")),e+r[0].length):-1}function pa(t,n,e){var r=Dx.exec(n.slice(e,e+2));return r?(t.m=r[0]-1,e+r[0].length):-1}function da(t,n,e){var r=Dx.exec(n.slice(e,e+2));return r?(t.d=+r[0],e+r[0].length):-1}function va(t,n,e){var r=Dx.exec(n.slice(e,e+3));return r?(t.m=0,t.d=+r[0],e+r[0].length):-1}function _a(t,n,e){var r=Dx.exec(n.slice(e,e+2));return r?(t.H=+r[0],e+r[0].length):-1}function ga(t,n,e){var r=Dx.exec(n.slice(e,e+2));return r?(t.M=+r[0],e+r[0].length):-1}function ya(t,n,e){var r=Dx.exec(n.slice(e,e+2));return r?(t.S=+r[0],e+r[0].length):-1}function ma(t,n,e){var r=Dx.exec(n.slice(e,e+3));return r?(t.L=+r[0],e+r[0].length):-1}function xa(t,n,e){var r=Ox.exec(n.slice(e,e+1));return r?e+r[0].length:-1}function ba(t,n){return ra(t.getDate(),n,2)}function wa(t,n){return ra(t.getHours(),n,2)}function Ma(t,n){return ra(t.getHours()%12||12,n,2)}function Ta(t,n){return ra(1+Xm.count(sx(t),t),n,3)}function Sa(t,n){return ra(t.getMilliseconds(),n,3)}function Na(t,n){return ra(t.getMonth()+1,n,2)}function ka(t,n){return ra(t.getMinutes(),n,2)}function Ea(t,n){return ra(t.getSeconds(),n,2)}function Aa(t,n){return ra($m.count(sx(t),t),n,2)}function Ca(t){return t.getDay()}function za(t,n){return ra(Wm.count(sx(t),t),n,2)}function Pa(t,n){return ra(t.getFullYear()%100,n,2)}function La(t,n){return ra(t.getFullYear()%1e4,n,4)}function Ra(t){var n=t.getTimezoneOffset();return(n>0?"-":(n*=-1,"+"))+ra(n/60|0,"0",2)+ra(n%60,"0",2)}function qa(t,n){return ra(t.getUTCDate(),n,2)}function Ua(t,n){return ra(t.getUTCHours(),n,2)}function Da(t,n){return ra(t.getUTCHours()%12||12,n,2)}function Oa(t,n){return ra(1+vx.count(Lx(t),t),n,3)}function Fa(t,n){return ra(t.getUTCMilliseconds(),n,3)}function Ia(t,n){return ra(t.getUTCMonth()+1,n,2)}function Ya(t,n){return ra(t.getUTCMinutes(),n,2)}function Ba(t,n){return ra(t.getUTCSeconds(),n,2)}function ja(t,n){return ra(gx.count(Lx(t),t),n,2)}function Ha(t){return t.getUTCDay()}function Xa(t,n){return ra(yx.count(Lx(t),t),n,2)}function Va(t,n){return ra(t.getUTCFullYear()%100,n,2)}function $a(t,n){return ra(t.getUTCFullYear()%1e4,n,4)}function Wa(){return"+0000"}function Za(){return"%"}function Ga(n){return Rx=ea(n),t.timeFormat=Rx.format,t.timeParse=Rx.parse,t.utcFormat=Rx.utcFormat,t.utcParse=Rx.utcParse,Rx}function Ja(t){return t.toISOString()}function Qa(t){var n=new Date(t);return isNaN(n)?null:n}function Ka(t){return new Date(t)}function tc(t){return t instanceof Date?+t:+new Date(+t)}function nc(t,n,e,r,o,u,a,c,s){function f(i){return(a(i)1?0:t<-1?xb:Math.acos(t)}function oc(t){return t>=1?bb:t<=-1?-bb:Math.asin(t)}function uc(t){return t.innerRadius}function ac(t){return t.outerRadius}function cc(t){return t.startAngle}function sc(t){return t.endAngle}function fc(t){return t&&t.padAngle}function lc(t,n,e,r,i,o,u,a){var c=e-t,s=r-n,f=u-i,l=a-o,h=(f*(n-o)-l*(t-i))/(l*c-f*s);return[t+h*c,n+h*s]}function hc(t,n,e,r,i,o,u){var a=t-e,c=n-r,s=(u?o:-o)/yb(a*a+c*c),f=s*c,l=-s*a,h=t+f,p=n+l,d=e+f,v=r+l,_=(h+d)/2,g=(p+v)/2,y=d-h,m=v-p,x=y*y+m*m,b=i-o,w=h*v-d*p,M=(m<0?-1:1)*yb(vb(0,b*b*x-w*w)),T=(w*m-y*M)/x,S=(-w*y-m*M)/x,N=(w*m+y*M)/x,k=(-w*y+m*M)/x,E=T-_,A=S-g,C=N-_,z=k-g;return E*E+A*A>C*C+z*z&&(T=N,S=k),{cx:T,cy:S,x01:-f,y01:-l,x11:T*(i/b-1),y11:S*(i/b-1)}}function pc(t){this._context=t}function dc(t){return t[0]}function vc(t){return t[1]}function _c(t){this._curve=t}function gc(t){function n(n){return new _c(t(n))}return n._curve=t,n}function yc(t){var n=t.curve;return t.angle=t.x,delete t.x,t.radius=t.y,delete t.y,t.curve=function(t){return arguments.length?n(gc(t)):n()._curve},t}function mc(t,n,e){t._context.bezierCurveTo((2*t._x0+t._x1)/3,(2*t._y0+t._y1)/3,(t._x0+2*t._x1)/3,(t._y0+2*t._y1)/3,(t._x0+4*t._x1+n)/6,(t._y0+4*t._y1+e)/6)}function xc(t){this._context=t}function bc(t){this._context=t}function wc(t){this._context=t}function Mc(t,n){this._basis=new xc(t),this._beta=n}function Tc(t,n,e){t._context.bezierCurveTo(t._x1+t._k*(t._x2-t._x0),t._y1+t._k*(t._y2-t._y0),t._x2+t._k*(t._x1-n),t._y2+t._k*(t._y1-e),t._x2,t._y2)}function Sc(t,n){this._context=t,this._k=(1-n)/6}function Nc(t,n){this._context=t,this._k=(1-n)/6}function kc(t,n){this._context=t,this._k=(1-n)/6}function Ec(t,n,e){var r=t._x1,i=t._y1,o=t._x2,u=t._y2;if(t._l01_a>mb){var a=2*t._l01_2a+3*t._l01_a*t._l12_a+t._l12_2a,c=3*t._l01_a*(t._l01_a+t._l12_a);r=(r*a-t._x0*t._l12_2a+t._x2*t._l01_2a)/c,i=(i*a-t._y0*t._l12_2a+t._y2*t._l01_2a)/c}if(t._l23_a>mb){var s=2*t._l23_2a+3*t._l23_a*t._l12_a+t._l12_2a,f=3*t._l23_a*(t._l23_a+t._l12_a);o=(o*s+t._x1*t._l23_2a-n*t._l12_2a)/f,u=(u*s+t._y1*t._l23_2a-e*t._l12_2a)/f}t._context.bezierCurveTo(r,i,o,u,t._x2,t._y2)}function Ac(t,n){this._context=t,this._alpha=n}function Cc(t,n){this._context=t,this._alpha=n}function zc(t,n){this._context=t,this._alpha=n}function Pc(t){this._context=t}function Lc(t){return t<0?-1:1}function Rc(t,n,e){var r=t._x1-t._x0,i=n-t._x1,o=(t._y1-t._y0)/(r||i<0&&-0),u=(e-t._y1)/(i||r<0&&-0),a=(o*i+u*r)/(r+i);return(Lc(o)+Lc(u))*Math.min(Math.abs(o),Math.abs(u),.5*Math.abs(a))||0}function qc(t,n){var e=t._x1-t._x0;return e?(3*(t._y1-t._y0)/e-n)/2:n}function Uc(t,n,e){var r=t._x0,i=t._y0,o=t._x1,u=t._y1,a=(o-r)/3;t._context.bezierCurveTo(r+a,i+a*n,o-a,u-a*e,o,u)}function Dc(t){this._context=t}function Oc(t){this._context=new Fc(t)}function Fc(t){this._context=t}function Ic(t){return new Dc(t)}function Yc(t){return new Oc(t)}function Bc(t){this._context=t}function jc(t){var n,e,r=t.length-1,i=new Array(r),o=new Array(r),u=new Array(r);for(i[0]=0,o[0]=2,u[0]=t[0]+2*t[1],n=1;n=0;--n)i[n]=(u[n]-i[n+1])/o[n];for(o[r-1]=(t[r]+i[r-1])/2,n=0;n0)){if(o/=d,d<0){if(o0){if(o>p)return;o>h&&(h=o)}if(o=r-c,d||!(o<0)){if(o/=d,d<0){if(o>p)return;o>h&&(h=o)}else if(d>0){if(o0)){if(o/=v,v<0){if(o0){if(o>p)return;o>h&&(h=o)}if(o=i-s,v||!(o<0)){if(o/=v,v<0){if(o>p)return;o>h&&(h=o)}else if(v>0){if(o0||p<1)||(h>0&&(t[0]=[c+h*d,s+h*v]),p<1&&(t[1]=[c+p*d,s+p*v]),!0)}}}}}function us(t,n,e,r,i){var o=t[1];if(o)return!0;var u,a,c=t[0],s=t.left,f=t.right,l=s[0],h=s[1],p=f[0],d=f[1],v=(l+p)/2,_=(h+d)/2;if(d===h){if(v=r)return;if(l>p){if(c){if(c[1]>=i)return}else c=[v,e];o=[v,i]}else{if(c){if(c[1]1)if(l>p){if(c){if(c[1]>=i)return}else c=[(e-a)/u,e];o=[(i-a)/u,i]}else{if(c){if(c[1]=r)return}else c=[n,u*n+a];o=[r,u*r+a]}else{if(c){if(c[0]zw||Math.abs(i[0][1]-i[1][1])>zw)||delete Ew[o]}function cs(t){return Nw[t.index]={site:t,halfedges:[]}}function ss(t,n){var e=t.site,r=n.left,i=n.right;return e===i&&(i=r,r=e),i?Math.atan2(i[1]-r[1],i[0]-r[0]):(e===r?(r=n[1],i=n[0]):(r=n[0],i=n[1]),Math.atan2(r[0]-i[0],i[1]-r[1]))}function fs(t,n){return n[+(n.left!==t.site)]}function ls(t,n){return n[+(n.left===t.site)]}function hs(){for(var t,n,e,r,i=0,o=Nw.length;izw||Math.abs(v-h)>zw)&&(c.splice(a,0,Ew.push(rs(u,p,Math.abs(d-t)zw?[t,Math.abs(l-t)zw?[Math.abs(h-r)zw?[e,Math.abs(l-e)zw?[Math.abs(h-n)=-Pw)){var p=c*c+s*s,d=f*f+l*l,v=(l*p-s*d)/h,_=(c*d-f*p)/h,g=Aw.pop()||new ds;g.arc=t,g.site=i,g.x=v+u,g.y=(g.cy=_+a)+Math.sqrt(v*v+_*_),t.circle=g;for(var y=null,m=kw._;m;)if(g.yzw)a=a.L;else{if(!((i=o-Ms(a,u))>zw)){r>-zw?(n=a.P,e=a):i>-zw?(n=a,e=a.N):n=e=a;break}if(!a.R){n=a;break}a=a.R}cs(t);var c=ys(t);if(Sw.insert(n,c),n||e){if(n===e)return _s(n),e=ys(n.site),Sw.insert(c,e),c.edge=e.edge=es(n.site,c.site),vs(n),void vs(e);if(!e)return void(c.edge=es(n.site,c.site));_s(n),_s(e);var s=n.site,f=s[0],l=s[1],h=t[0]-f,p=t[1]-l,d=e.site,v=d[0]-f,_=d[1]-l,g=2*(h*_-p*v),y=h*h+p*p,m=v*v+_*_,x=[(_*y-p*m)/g+f,(h*m-v*y)/g+l];is(e.edge,s,d,x),c.edge=es(s,t,null,x),e.edge=es(t,d,null,x),vs(n),vs(e)}}function ws(t,n){var e=t.site,r=e[0],i=e[1],o=i-n;if(!o)return r;var u=t.P;if(!u)return-1/0;e=u.site;var a=e[0],c=e[1],s=c-n;if(!s)return a;var f=a-r,l=1/o-1/s,h=f/s;return l?(-h+Math.sqrt(h*h-2*l*(f*f/(-2*s)-c+s/2+i-o/2)))/l+r:(r+a)/2}function Ms(t,n){var e=t.N;if(e)return ws(e,n);var r=t.site;return r[1]===n?r[0]:1/0}function Ts(t,n,e){return(t[0]-e[0])*(n[1]-t[1])-(t[0]-n[0])*(e[1]-t[1])}function Ss(t,n){return n[1]-t[1]||n[0]-t[0]}function Ns(t,n){var e,r,i,o=t.sort(Ss).pop();for(Ew=[],Nw=new Array(t.length),Sw=new Jc,kw=new Jc;;)if(i=Tw,o&&(!i||o[1]n?1:t>=n?0:NaN},qs=function(t){return 1===t.length&&(t=n(t)),{left:function(n,e,r,i){for(null==r&&(r=0),null==i&&(i=n.length);r>>1;t(n[o],e)<0?r=o+1:i=o}return r},right:function(n,e,r,i){for(null==r&&(r=0),null==i&&(i=n.length);r>>1;t(n[o],e)>0?i=o:r=o+1}return r}}},Us=qs(Rs),Ds=Us.right,Os=Us.left,Fs=function(t,n){null==n&&(n=e);for(var r=0,i=t.length-1,o=t[0],u=new Array(i<0?0:i);rt?1:n>=t?0:NaN},Bs=function(t){return null===t?NaN:+t},js=function(t,n){var e,r,i=t.length,o=0,u=-1,a=0,c=0;if(null==n)for(;++u1)return c/(o-1)},Hs=function(t,n){var e=js(t,n);return e?Math.sqrt(e):e},Xs=function(t,n){var e,r,i,o=t.length,u=-1;if(null==n){for(;++u=e)for(r=i=e;++ue&&(r=e),i=e)for(r=i=e;++ue&&(r=e),i0)for(t=Math.ceil(t/u),n=Math.floor(n/u),o=new Array(i=Math.ceil(n-t+1));++cl;)h.pop(),--p;var d,v=new Array(p+1);for(o=0;o<=p;++o)d=v[o]=[],d.x0=o>0?h[o-1]:f,d.x1=o=1)return+e(t[r-1],r-1,t);var r,i=(r-1)*n,o=Math.floor(i),u=+e(t[o],o,t);return u+(+e(t[o+1],o+1,t)-u)*(i-o)}},uf=function(t,n,e){return t=Ws.call(t,Bs).sort(Rs),Math.ceil((e-n)/(2*(of(t,.75)-of(t,.25))*Math.pow(t.length,-1/3)))},af=function(t,n,e){return Math.ceil((e-n)/(3.5*Hs(t)*Math.pow(t.length,-1/3)))},cf=function(t,n){var e,r,i=t.length,o=-1;if(null==n){for(;++o=e)for(r=e;++or&&(r=e)}else for(;++o=e)for(r=e;++or&&(r=e);return r},sf=function(t,n){var e,r=t.length,i=r,o=-1,u=0;if(null==n)for(;++o=0;)for(r=t[i],n=r.length;--n>=0;)e[--u]=r[n];return e},hf=function(t,n){var e,r,i=t.length,o=-1;if(null==n){for(;++o=e)for(r=e;++oe&&(r=e)}else for(;++o=e)for(r=e;++oe&&(r=e);return r},pf=function(t,n){for(var e=n.length,r=new Array(e);e--;)r[e]=t[n[e]];return r},df=function(t,n){if(e=t.length){var e,r,i=0,o=0,u=t[o];for(null==n&&(n=Rs);++i0)for(var e,r,i=new Array(e),o=0;o=0&&"xmlns"!==(n=t.slice(0,e))&&(t=t.slice(e+1)),Ef.hasOwnProperty(n)?{space:Ef[n],local:t}:t},Cf=function(t){var n=Af(t);return(n.local?b:x)(n)},zf=0;M.prototype=w.prototype={constructor:M,get:function(t){for(var n=this._;!(n in t);)if(!(t=t.parentNode))return;return t[n]},set:function(t,n){return t[this._]=n},remove:function(t){return this._ in t&&delete t[this._]},toString:function(){return this._}};var Pf=function(t){return function(){return this.matches(t)}};if("undefined"!=typeof document){var Lf=document.documentElement;if(!Lf.matches){var Rf=Lf.webkitMatchesSelector||Lf.msMatchesSelector||Lf.mozMatchesSelector||Lf.oMatchesSelector;Pf=function(t){return function(){return Rf.call(this,t)}}}}var qf=Pf,Uf={};if(t.event=null,"undefined"!=typeof document){"onmouseenter"in document.documentElement||(Uf={mouseenter:"mouseover",mouseleave:"mouseout"})}var Df=function(t,n,e){var r,i,o=N(t+""),u=o.length;{if(!(arguments.length<2)){for(a=n?E:k,null==e&&(e=!1),r=0;r=x&&(x=m+1);!(y=_[x])&&++x=0;)(r=i[o])&&(u&&u!==r.nextSibling&&u.parentNode.insertBefore(r,u),u=r);return this},tl=function(t){function n(n,e){return n&&e?t(n.__data__,e.__data__):!n-!e}t||(t=q);for(var e=this._groups,r=e.length,i=new Array(r),o=0;o1?this.each((null==n?B:"function"==typeof n?H:j)(t,n,null==e?"":e)):cl(r=this.node()).getComputedStyle(r,null).getPropertyValue(t)},fl=function(t,n){return arguments.length>1?this.each((null==n?X:"function"==typeof n?$:V)(t,n)):this.node()[t]};G.prototype={add:function(t){this._names.indexOf(t)<0&&(this._names.push(t),this._node.setAttribute("class",this._names.join(" ")))},remove:function(t){var n=this._names.indexOf(t);n>=0&&(this._names.splice(n,1),this._node.setAttribute("class",this._names.join(" ")))},contains:function(t){return this._names.indexOf(t)>=0}};var ll=function(t,n){var e=W(t+"");if(arguments.length<2){for(var r=Z(this.node()),i=-1,o=e.length;++i=240?t-240:t+120,i,r),qt(t,i,r),qt(t<120?t+240:t-120,i,r),this.opacity)},displayable:function(){return(0<=this.s&&this.s<=1||isNaN(this.s))&&0<=this.l&&this.l<=1&&0<=this.opacity&&this.opacity<=1}}));var jl=Math.PI/180,Hl=180/Math.PI,Xl=.95047,Vl=1,$l=1.08883,Wl=4/29,Zl=6/29,Gl=3*Zl*Zl,Jl=Zl*Zl*Zl;Cl(Ot,Dt,Mt(Tt,{brighter:function(t){return new Ot(this.l+18*(null==t?1:t),this.a,this.b,this.opacity)},darker:function(t){return new Ot(this.l-18*(null==t?1:t),this.a,this.b,this.opacity)},rgb:function(){var t=(this.l+16)/116,n=isNaN(this.a)?t:t+this.a/500,e=isNaN(this.b)?t:t-this.b/200;return t=Vl*It(t),n=Xl*It(n),e=$l*It(e),new Ct(Yt(3.2404542*n-1.5371385*t-.4985314*e),Yt(-.969266*n+1.8760108*t+.041556*e),Yt(.0556434*n-.2040259*t+1.0572252*e),this.opacity)}})),Cl(Xt,Ht,Mt(Tt,{brighter:function(t){return new Xt(this.h,this.c,this.l+18*(null==t?1:t),this.opacity)},darker:function(t){return new Xt(this.h,this.c,this.l-18*(null==t?1:t),this.opacity)},rgb:function(){return Ut(this).rgb()}}));var Ql=-.14861,Kl=1.78277,th=-.29227,nh=-.90649,eh=1.97294,rh=eh*nh,ih=eh*Kl,oh=Kl*th-nh*Ql;Cl(Wt,$t,Mt(Tt,{brighter:function(t){return t=null==t?1/.7:Math.pow(1/.7,t),new Wt(this.h,this.s,this.l*t,this.opacity)},darker:function(t){return t=null==t?.7:Math.pow(.7,t),new Wt(this.h,this.s,this.l*t,this.opacity)},rgb:function(){var t=isNaN(this.h)?0:(this.h+120)*jl,n=+this.l,e=isNaN(this.s)?0:this.s*n*(1-n),r=Math.cos(t),i=Math.sin(t);return new Ct(255*(n+e*(Ql*r+Kl*i)),255*(n+e*(th*r+nh*i)),255*(n+e*(eh*r)),this.opacity)}}));var uh,ah,ch,sh,fh,lh,hh=function(t){var n=t.length-1;return function(e){var r=e<=0?e=0:e>=1?(e=1,n-1):Math.floor(e*n),i=t[r],o=t[r+1],u=r>0?t[r-1]:2*i-o,a=ro&&(i=n.slice(o,i),a[u]?a[u]+=i:a[++u]=i),(e=e[0])===(r=r[0])?a[u]?a[u]+=r:a[++u]=r:(a[++u]=null,c.push({i:u,x:xh(e,r)})),o=Mh.lastIndex;return oep&&e.statenp&&e.name===n)return new te([[t]],nd,n,+r)}return null},rd=function(t){return function(){return t}},id=function(t,n,e){this.target=t,this.type=n,this.selection=e},od=function(){t.event.preventDefault(),t.event.stopImmediatePropagation()},ud={name:"drag"},ad={name:"space"},cd={name:"handle"},sd={name:"center"},fd={name:"x",handles:["e","w"].map(Te),input:function(t,n){return t&&[[t[0],n[0][1]],[t[1],n[1][1]]]},output:function(t){return t&&[t[0][0],t[1][0]]}},ld={name:"y",handles:["n","s"].map(Te),input:function(t,n){return t&&[[n[0][0],t[0]],[n[1][0],t[1]]]},output:function(t){return t&&[t[0][1],t[1][1]]}},hd={name:"xy",handles:["n","e","s","w","nw","ne","se","sw"].map(Te),input:function(t){return t},output:function(t){return t}},pd={overlay:"crosshair",selection:"move",n:"ns-resize",e:"ew-resize",s:"ns-resize",w:"ew-resize",nw:"nwse-resize",ne:"nesw-resize",se:"nwse-resize",sw:"nesw-resize"},dd={e:"w",w:"e",nw:"ne",ne:"nw",se:"sw",sw:"se"},vd={n:"s",s:"n",nw:"sw",ne:"se",se:"ne",sw:"nw"},_d={overlay:1,selection:1,n:null,e:1,s:null,w:-1,nw:-1,ne:1,se:1,sw:-1},gd={overlay:1,selection:1,n:-1,e:null,s:1,w:null,nw:-1,ne:-1,se:1,sw:1},yd=function(){return Pe(hd)},md=Math.cos,xd=Math.sin,bd=Math.PI,wd=bd/2,Md=2*bd,Td=Math.max,Sd=function(){function t(t){var o,u,a,c,s,f,l=t.length,h=[],p=Js(l),d=[],v=[],_=v.groups=new Array(l),g=new Array(l*l);for(o=0,s=-1;++s1e-6)if(Math.abs(f*a-c*s)>1e-6&&i){var h=e-o,p=r-u,d=a*a+c*c,v=h*h+p*p,_=Math.sqrt(d),g=Math.sqrt(l),y=i*Math.tan((Ed-Math.acos((d+l-v)/(2*_*g)))/2),m=y/g,x=y/_;Math.abs(m-1)>1e-6&&(this._+="L"+(t+m*s)+","+(n+m*f)),this._+="A"+i+","+i+",0,0,"+ +(f*h>s*p)+","+(this._x1=t+x*a)+","+(this._y1=n+x*c)}else this._+="L"+(this._x1=t)+","+(this._y1=n);else;},arc:function(t,n,e,r,i,o){t=+t,n=+n,e=+e;var u=e*Math.cos(r),a=e*Math.sin(r),c=t+u,s=n+a,f=1^o,l=o?r-i:i-r;if(e<0)throw new Error("negative radius: "+e);null===this._x1?this._+="M"+c+","+s:(Math.abs(this._x1-c)>1e-6||Math.abs(this._y1-s)>1e-6)&&(this._+="L"+c+","+s),e&&(l<0&&(l=l%Ad+Ad),l>Cd?this._+="A"+e+","+e+",0,1,"+f+","+(t-u)+","+(n-a)+"A"+e+","+e+",0,1,"+f+","+(this._x1=c)+","+(this._y1=s):l>1e-6&&(this._+="A"+e+","+e+",0,"+ +(l>=Ed)+","+f+","+(this._x1=t+e*Math.cos(i))+","+(this._y1=n+e*Math.sin(i))))},rect:function(t,n,e,r){this._+="M"+(this._x0=this._x1=+t)+","+(this._y0=this._y1=+n)+"h"+ +e+"v"+ +r+"h"+-e+"Z"},toString:function(){return this._}};var zd=function(){function t(){var t,a=Nd.call(arguments),c=n.apply(this,a),s=e.apply(this,a),f=+r.apply(this,(a[0]=c,a)),l=i.apply(this,a)-wd,h=o.apply(this,a)-wd,p=f*md(l),d=f*xd(l),v=+r.apply(this,(a[0]=s,a)),_=i.apply(this,a)-wd,g=o.apply(this,a)-wd;if(u||(u=t=qe()),u.moveTo(p,d),u.arc(0,0,f,l,h),l===_&&h===g||(u.quadraticCurveTo(0,0,v*md(_),v*xd(_)),u.arc(0,0,v,_,g)),u.quadraticCurveTo(0,0,p,d),u.closePath(),t)return u=null,t+""||null}var n=Ue,e=De,r=Oe,i=Fe,o=Ie,u=null;return t.radius=function(n){return arguments.length?(r="function"==typeof n?n:kd(+n),t):r},t.startAngle=function(n){return arguments.length?(i="function"==typeof n?n:kd(+n),t):i},t.endAngle=function(n){return arguments.length?(o="function"==typeof n?n:kd(+n),t):o},t.source=function(e){return arguments.length?(n=e,t):n},t.target=function(n){return arguments.length?(e=n,t):e},t.context=function(n){return arguments.length?(u=null==n?null:n,t):u},t};Ye.prototype=Be.prototype={constructor:Ye,has:function(t){return"$"+t in this},get:function(t){return this["$"+t]},set:function(t,n){return this["$"+t]=n,this},remove:function(t){var n="$"+t;return n in this&&delete this[n]},clear:function(){for(var t in this)"$"===t[0]&&delete this[t]},keys:function(){var t=[];for(var n in this)"$"===n[0]&&t.push(n.slice(1));return t},values:function(){var t=[];for(var n in this)"$"===n[0]&&t.push(this[n]);return t},entries:function(){var t=[];for(var n in this)"$"===n[0]&&t.push({key:n.slice(1),value:this[n]});return t},size:function(){var t=0;for(var n in this)"$"===n[0]&&++t;return t},empty:function(){for(var t in this)if("$"===t[0])return!1;return!0},each:function(t){for(var n in this)"$"===n[0]&&t(this[n],n.slice(1),this)}};var Pd=function(){function t(n,i,u,a){if(i>=o.length)return null!=r?r(n):null!=e?n.sort(e):n;for(var c,s,f,l=-1,h=n.length,p=o[i++],d=Be(),v=u();++lo.length)return t;var i,a=u[e-1];return null!=r&&e>=o.length?i=t.entries():(i=[],t.each(function(t,r){i.push({key:r,values:n(t,e)})})),null!=a?i.sort(function(t,n){return a(t.key,n.key)}):i}var e,r,i,o=[],u=[];return i={object:function(n){return t(n,0,je,He)},map:function(n){return t(n,0,Xe,Ve)},entries:function(e){return n(t(e,0,Xe,Ve),0)},key:function(t){return o.push(t),i},sortKeys:function(t){return u[o.length-1]=t,i},sortValues:function(t){return e=t,i},rollup:function(t){return r=t,i}}},Ld=Be.prototype;$e.prototype=We.prototype={constructor:$e,has:Ld.has,add:function(t){return t+="",this["$"+t]=t,this},remove:Ld.remove,clear:Ld.clear,values:Ld.keys,size:Ld.size,empty:Ld.empty,each:Ld.each};var Rd=function(t){var n=[];for(var e in t)n.push(e);return n},qd=function(t){var n=[];for(var e in t)n.push(t[e]);return n},Ud=function(t){var n=[];for(var e in t)n.push({key:e,value:t[e]});return n},Dd=function(t){function n(t,n){var r,i,o=e(t,function(t,e){if(r)return r(t,e-1);i=t,r=n?Ge(t,n):Ze(t)});return o.columns=i,o}function e(t,n){function e(){if(f>=s)return u;if(i)return i=!1,o;var n,e=f;if(34===t.charCodeAt(e)){for(var r=e;r++t||t>i||r>n||n>o))return this;var u,a,c=i-e,s=this._root;switch(a=(n<(r+o)/2)<<1|t<(e+i)/2){case 0:do{u=new Array(4),u[a]=s,s=u}while(c*=2,i=e+c,o=r+c,t>i||n>o);break;case 1:do{u=new Array(4),u[a]=s,s=u}while(c*=2,e=i-c,o=r+c,e>t||n>o);break;case 2:do{u=new Array(4),u[a]=s,s=u}while(c*=2,i=e+c,r=o-c,t>i||r>n);break;case 3:do{u=new Array(4),u[a]=s,s=u}while(c*=2,e=i-c,r=o-c,e>t||r>n)}this._root&&this._root.length&&(this._root=s)}return this._x0=e,this._y0=r,this._x1=i,this._y1=o,this},Kd=function(){var t=[];return this.visit(function(n){if(!n.length)do{t.push(n.data)}while(n=n.next)}),t},tv=function(t){return arguments.length?this.cover(+t[0][0],+t[0][1]).cover(+t[1][0],+t[1][1]):isNaN(this._x0)?void 0:[[this._x0,this._y0],[this._x1,this._y1]]},nv=function(t,n,e,r,i){this.node=t,this.x0=n,this.y0=e,this.x1=r,this.y1=i},ev=function(t,n,e){var r,i,o,u,a,c,s,f=this._x0,l=this._y0,h=this._x1,p=this._y1,d=[],v=this._root;for(v&&d.push(new nv(v,f,l,h,p)),null==e?e=1/0:(f=t-e,l=n-e,h=t+e,p=n+e,e*=e);c=d.pop();)if(!(!(v=c.node)||(i=c.x0)>h||(o=c.y0)>p||(u=c.x1)=g)<<1|t>=_)&&(c=d[d.length-1],d[d.length-1]=d[d.length-1-s],d[d.length-1-s]=c)}else{var y=t-+this._x.call(null,v.data),m=n-+this._y.call(null,v.data),x=y*y+m*m;if(x=(a=(d+_)/2))?d=a:_=a,(f=u>=(c=(v+g)/2))?v=c:g=c,n=p,!(p=p[l=f<<1|s]))return this;if(!p.length)break;(n[l+1&3]||n[l+2&3]||n[l+3&3])&&(e=n,h=l)}for(;p.data!==t;)if(r=p,!(p=p.next))return this;return(i=p.next)&&delete p.next,r?(i?r.next=i:delete r.next,this):n?(i?n[l]=i:delete n[l],(p=n[0]||n[1]||n[2]||n[3])&&p===(n[3]||n[2]||n[1]||n[0])&&!p.length&&(e?e[h]=p:this._root=p),this):(this._root=i,this)},iv=function(){return this._root},ov=function(){var t=0;return this.visit(function(n){if(!n.length)do{++t}while(n=n.next)}),t},uv=function(t){var n,e,r,i,o,u,a=[],c=this._root;for(c&&a.push(new nv(c,this._x0,this._y0,this._x1,this._y1));n=a.pop();)if(!t(c=n.node,r=n.x0,i=n.y0,o=n.x1,u=n.y1)&&c.length){var s=(r+o)/2,f=(i+u)/2;(e=c[3])&&a.push(new nv(e,s,f,o,u)),(e=c[2])&&a.push(new nv(e,r,f,s,u)),(e=c[1])&&a.push(new nv(e,s,i,o,f)),(e=c[0])&&a.push(new nv(e,r,i,s,f))}return this},av=function(t){var n,e=[],r=[];for(this._root&&e.push(new nv(this._root,this._x0,this._y0,this._x1,this._y1));n=e.pop();){var i=n.node;if(i.length){var o,u=n.x0,a=n.y0,c=n.x1,s=n.y1,f=(u+c)/2,l=(a+s)/2;(o=i[0])&&e.push(new nv(o,u,a,f,l)),(o=i[1])&&e.push(new nv(o,f,a,c,l)),(o=i[2])&&e.push(new nv(o,u,l,f,s)),(o=i[3])&&e.push(new nv(o,f,l,c,s))}r.push(n)}for(;n=r.pop();)t(n.node,n.x0,n.y0,n.x1,n.y1);return this},cv=function(t){return arguments.length?(this._x=t,this):this._x},sv=function(t){return arguments.length?(this._y=t,this):this._y},fv=rr.prototype=ir.prototype;fv.copy=function(){var t,n,e=new ir(this._x,this._y,this._x0,this._y0,this._x1,this._y1),r=this._root;if(!r)return e;if(!r.length)return e._root=or(r),e;for(t=[{source:r,target:e._root=new Array(4)}];r=t.pop();)for(var i=0;i<4;++i)(n=r.source[i])&&(n.length?t.push({source:n,target:r.target[i]=new Array(4)}):r.target[i]=or(n));return e},fv.add=Jd,fv.addAll=Ke,fv.cover=Qd,fv.data=Kd,fv.extent=tv,fv.find=ev,fv.remove=rv,fv.removeAll=tr,fv.root=iv,fv.size=ov,fv.visit=uv,fv.visitAfter=av,fv.x=cv,fv.y=sv;var lv,hv=function(t){function n(){function t(t,n,e,r,i){var o=t.data,a=t.r,p=l+a;{if(!o)return n>s+p||rf+p||ic.index){var d=s-o.x-o.vx,v=f-o.y-o.vy,_=d*d+v*v;_t.r&&(t.r=t[n].r)}function r(){if(i){var n,e,r=i.length;for(o=new Array(r),n=0;n1?(null==n?l.remove(t):l.set(t,i(n)),o):l.get(t)},find:function(n,e,r){var i,o,u,a,c,s=0,f=t.length;for(null==r?r=1/0:r*=r,s=0;s1?(p.on(t,n),o):p.on(t)}}},gv=function(){function t(t){var n,a=i.length,c=rr(i,fr,lr).visitAfter(e);for(u=t,n=0;n=f)){(t.data!==o||t.next)&&(0===i&&(i=Gd(),p+=i*i),0===c&&(c=Gd(),p+=c*c),p1?r[0]+r.slice(2):r,+t.slice(e+1)]},bv=function(t){return t=xv(Math.abs(t)),t?t[1]:NaN},wv=function(t,n){return function(e,r){for(var i=e.length,o=[],u=0,a=t[0],c=0;i>0&&a>0&&(c+a+1>r&&(a=Math.max(1,r-c)),o.push(e.substring(i-=a,i+a)),!((c+=a+1)>r));)a=t[u=(u+1)%t.length];return o.reverse().join(n)}},Mv=function(t){return function(n){return n.replace(/[0-9]/g,function(n){return t[+n]})}},Tv=function(t,n){t=t.toPrecision(n);t:for(var e,r=t.length,i=1,o=-1;i0&&(o=0)}return o>0?t.slice(0,o)+t.slice(e+1):t},Sv=function(t,n){var e=xv(t,n);if(!e)return t+"";var r=e[0],i=e[1],o=i-(lv=3*Math.max(-8,Math.min(8,Math.floor(i/3))))+1,u=r.length;return o===u?r:o>u?r+new Array(o-u+1).join("0"):o>0?r.slice(0,o)+"."+r.slice(o):"0."+new Array(1-o).join("0")+xv(t,Math.max(0,n+o-1))[0]},Nv=function(t,n){var e=xv(t,n);if(!e)return t+"";var r=e[0],i=e[1];return i<0?"0."+new Array(-i).join("0")+r:r.length>i+1?r.slice(0,i+1)+"."+r.slice(i+1):r+new Array(i-r.length+2).join("0")},kv={"":Tv,"%":function(t,n){return(100*t).toFixed(n)},b:function(t){return Math.round(t).toString(2)},c:function(t){return t+""},d:function(t){return Math.round(t).toString(10)},e:function(t,n){return t.toExponential(n)},f:function(t,n){return t.toFixed(n)},g:function(t,n){return t.toPrecision(n)},o:function(t){return Math.round(t).toString(8)},p:function(t,n){return Nv(100*t,n)},r:Nv,s:Sv,X:function(t){return Math.round(t).toString(16).toUpperCase()},x:function(t){return Math.round(t).toString(16)}},Ev=/^(?:(.)?([<>=^]))?([+\-\( ])?([$#])?(0)?(\d+)?(,)?(\.\d+)?([a-z%])?$/i;hr.prototype=pr.prototype,pr.prototype.toString=function(){return this.fill+this.align+this.sign+this.symbol+(this.zero?"0":"")+(null==this.width?"":Math.max(1,0|this.width))+(this.comma?",":"")+(null==this.precision?"":"."+Math.max(0,0|this.precision))+this.type};var Av,Cv=function(t){return t},zv=["y","z","a","f","p","n","µ","m","","k","M","G","T","P","E","Z","Y"],Pv=function(t){function n(t){function n(t){var n,i,a,f=_,x=g;if("c"===v)x=y(t)+x,t="";else{t=+t;var b=t<0;if(t=y(Math.abs(t),d),b&&0==+t&&(b=!1),f=(b?"("===s?s:"-":"-"===s||"("===s?"":s)+f,x=x+("s"===v?zv[8+lv/3]:"")+(b&&"("===s?")":""),m)for(n=-1,i=t.length;++n(a=t.charCodeAt(n))||a>57){x=(46===a?o+t.slice(n+1):t.slice(n))+x,t=t.slice(0,n);break}}p&&!l&&(t=r(t,1/0));var w=f.length+t.length+x.length,M=w>1)+f+t+x+M.slice(w);break;default:t=M+f+t+x}return u(t)}t=hr(t);var e=t.fill,c=t.align,s=t.sign,f=t.symbol,l=t.zero,h=t.width,p=t.comma,d=t.precision,v=t.type,_="$"===f?i[0]:"#"===f&&/[boxX]/.test(v)?"0"+v.toLowerCase():"",g="$"===f?i[1]:/[%p]/.test(v)?a:"",y=kv[v],m=!v||/[defgprs%]/.test(v);return d=null==d?v?6:12:/[gprs]/.test(v)?Math.max(1,Math.min(21,d)):Math.max(0,Math.min(20,d)),n.toString=function(){return t+""},n}function e(t,e){var r=n((t=hr(t),t.type="f",t)),i=3*Math.max(-8,Math.min(8,Math.floor(bv(e)/3))),o=Math.pow(10,-i),u=zv[8+i/3];return function(t){return r(o*t)+u}}var r=t.grouping&&t.thousands?wv(t.grouping,t.thousands):Cv,i=t.currency,o=t.decimal,u=t.numerals?Mv(t.numerals):Cv,a=t.percent||"%";return{format:n,formatPrefix:e}};dr({decimal:".",thousands:",",grouping:[3],currency:["$",""]});var Lv=function(t){return Math.max(0,-bv(Math.abs(t)))},Rv=function(t,n){return Math.max(0,3*Math.max(-8,Math.min(8,Math.floor(bv(n)/3)))-bv(Math.abs(t)))},qv=function(t,n){return t=Math.abs(t),n=Math.abs(n)-t,Math.max(0,bv(n)-bv(t))+1},Uv=function(){return new vr};vr.prototype={constructor:vr,reset:function(){this.s=this.t=0},add:function(t){_r(d_,t,this.t),_r(this,d_.s,this.s),this.s?this.t+=d_.t:this.s=d_.t},valueOf:function(){return this.s}};var Dv,Ov,Fv,Iv,Yv,Bv,jv,Hv,Xv,Vv,$v,Wv,Zv,Gv,Jv,Qv,Kv,t_,n_,e_,r_,i_,o_,u_,a_,c_,s_,f_,l_,h_,p_,d_=new vr,v_=1e-6,__=Math.PI,g_=__/2,y_=__/4,m_=2*__,x_=180/__,b_=__/180,w_=Math.abs,M_=Math.atan,T_=Math.atan2,S_=Math.cos,N_=Math.ceil,k_=Math.exp,E_=Math.log,A_=Math.pow,C_=Math.sin,z_=Math.sign||function(t){return t>0?1:t<0?-1:0},P_=Math.sqrt,L_=Math.tan,R_={Feature:function(t,n){br(t.geometry,n)},FeatureCollection:function(t,n){for(var e=t.features,r=-1,i=e.length;++rv_?Xv=90:Y_<-v_&&(jv=-90),Jv[0]=Bv,Jv[1]=Hv}},j_=function(t){var n,e,r,i,o,u,a;if(Xv=Hv=-(Bv=jv=1/0),Gv=[],U_(t,B_),e=Gv.length){for(Gv.sort(jr),n=1,r=Gv[0],o=[r];nBr(r[0],r[1])&&(r[1]=i[1]),Br(i[0],r[1])>Br(r[0],r[1])&&(r[0]=i[0])):o.push(r=i);for(u=-1/0,e=o.length-1,n=0,r=o[e];n<=e;r=i,++n)i=o[n],(a=Br(r[1],i[0]))>u&&(u=a,Bv=i[0],Hv=r[1])}return Gv=Jv=null,Bv===1/0||jv===1/0?[[NaN,NaN],[NaN,NaN]]:[[Bv,jv],[Hv,Xv]]},H_={sphere:xr,point:Xr,lineStart:$r,lineEnd:Gr,polygonStart:function(){H_.lineStart=Jr,H_.lineEnd=Qr},polygonEnd:function(){H_.lineStart=$r,H_.lineEnd=Gr}},X_=function(t){Qv=Kv=t_=n_=e_=r_=i_=o_=u_=a_=c_=0,U_(t,H_);var n=u_,e=a_,r=c_,i=n*n+e*e+r*r;return i<1e-12&&(n=r_,e=i_,r=o_,Kv2?t[2]*b_:0),n.invert=function(n){return n=t.invert(n[0]*b_,n[1]*b_),n[0]*=x_,n[1]*=x_,n},n},ug=function(){function t(t,n){e.push(t=r(t,n)),t[0]*=x_,t[1]*=x_}function n(){var t=i.apply(this,arguments),n=o.apply(this,arguments)*b_,c=u.apply(this,arguments)*b_;return e=[],r=ei(-t[0]*b_,-t[1]*b_,0).invert,ui(a,n,c,1),t={type:"Polygon",coordinates:[e]},e=r=null,t}var e,r,i=V_([0,0]),o=V_(90),u=V_(6),a={point:t};return n.center=function(t){return arguments.length?(i="function"==typeof t?t:V_([+t[0],+t[1]]),n):i},n.radius=function(t){return arguments.length?(o="function"==typeof t?t:V_(+t),n):o},n.precision=function(t){return arguments.length?(u="function"==typeof t?t:V_(+t),n):u},n},ag=function(){var t,n=[];return{point:function(n,e){t.push([n,e])},lineStart:function(){n.push(t=[])},lineEnd:xr,rejoin:function(){n.length>1&&n.push(n.pop().concat(n.shift()))},result:function(){var e=n;return n=[],t=null,e}}},cg=function(t,n,e,r,i,o){var u,a=t[0],c=t[1],s=n[0],f=n[1],l=0,h=1,p=s-a,d=f-c;if(u=e-a,p||!(u>0)){if(u/=p,p<0){if(u0){if(u>h)return;u>l&&(l=u)}if(u=i-a,p||!(u<0)){if(u/=p,p<0){if(u>h)return;u>l&&(l=u)}else if(p>0){if(u0)){if(u/=d,d<0){if(u0){if(u>h)return;u>l&&(l=u)}if(u=o-c,d||!(u<0)){if(u/=d,d<0){if(u>h)return;u>l&&(l=u)}else if(d>0){if(u0&&(t[0]=a+l*p,t[1]=c+l*d),h<1&&(n[0]=a+h*p,n[1]=c+h*d),!0}}}}},sg=function(t,n){return w_(t[0]-n[0])=0;--o)i.point((f=s[o])[0],f[1]);else r(h.x,h.p.x,-1,i);h=h.p}h=h.o,s=h.z,p=!p}while(!h.v);i.lineEnd()}}},lg=1e9,hg=-lg,pg=function(){var t,n,e,r=0,i=0,o=960,u=500;return e={stream:function(e){return t&&n===e?t:t=fi(r,i,o,u)(n=e)},extent:function(a){return arguments.length?(r=+a[0][0],i=+a[0][1],o=+a[1][0],u=+a[1][1],t=n=null,e):[[r,i],[o,u]]}}},dg=Uv(),vg=function(t,n){var e=n[0],r=n[1],i=[C_(e),-S_(e),0],o=0,u=0;dg.reset();for(var a=0,c=t.length;a=0?1:-1,T=M*w,S=T>__,N=d*x;if(dg.add(T_(N*M*C_(T),v*b+N*S_(T))),o+=S?w+M*m_:w,S^h>=e^y>=e){var k=zr(Ar(l),Ar(g));Rr(k);var E=zr(i,k);Rr(E);var A=(S^w>=0?-1:1)*yr(E[2]);(r>A||r===A&&(k[0]||k[1]))&&(u+=S^w>=0?1:-1)}}return(o<-v_||o0){for(x||(o.polygonStart(),x=!0),o.lineStart(),t=0;t1&&2&i&&u.push(u.pop().concat(u.shift())),d.push(u.filter(Hi))}var p,d,v,_=n(o),g=i.invert(r[0],r[1]),y=ag(),m=n(y),x=!1,b={point:u,lineStart:c,lineEnd:s,polygonStart:function(){b.point=f,b.lineStart=l,b.lineEnd=h,d=[],p=[]},polygonEnd:function(){b.point=u,b.lineStart=c,b.lineEnd=s,d=lf(d);var t=vg(p,g);d.length?(x||(o.polygonStart(),x=!0),fg(d,Xi,t,e,o)):t&&(x||(o.polygonStart(),x=!0),o.lineStart(),e(null,null,1,o),o.lineEnd()),x&&(o.polygonEnd(),x=!1),d=p=null},sphere:function(){o.polygonStart(),o.lineStart(),e(null,null,1,o),o.lineEnd(),o.polygonEnd()}};return b}},ty=Kg(function(){return!0},Vi,Wi,[-__,-g_]),ny=function(t,n){function e(e,r,i,o){ui(o,t,n,i,e,r)}function r(t,n){return S_(t)*S_(n)>a}function i(t){var n,e,i,a,f;return{lineStart:function(){a=i=!1,f=1},point:function(l,h){var p,d=[l,h],v=r(l,h),_=c?v?0:u(l,h):v?u(l+(l<0?__:-__),h):0;if(!n&&(a=i=v)&&t.lineStart(),v!==i&&(p=o(n,d),(sg(n,p)||sg(d,p))&&(d[0]+=v_,d[1]+=v_,v=r(d[0],d[1]))),v!==i)f=0,v?(t.lineStart(),p=o(d,n),t.point(p[0],p[1])):(p=o(n,d),t.point(p[0],p[1]),t.lineEnd()),n=p;else if(s&&n&&c^v){var g;_&e||!(g=o(d,n,!0))||(f=0,c?(t.lineStart(),t.point(g[0][0],g[0][1]),t.point(g[1][0],g[1][1]),t.lineEnd()):(t.point(g[1][0],g[1][1]),t.lineEnd(),t.lineStart(),t.point(g[0][0],g[0][1])))}!v||n&&sg(n,d)||t.point(d[0],d[1]),n=d,i=v,e=_},lineEnd:function(){i&&t.lineEnd(),n=null},clean:function(){return f|(a&&i)<<1}}}function o(t,n,e){var r=Ar(t),i=Ar(n),o=[1,0,0],u=zr(r,i),c=Cr(u,u),s=u[0],f=c-s*s;if(!f)return!e&&t;var l=a*c/f,h=-a*s/f,p=zr(o,u),d=Lr(o,l);Pr(d,Lr(u,h));var v=p,_=Cr(d,v),g=Cr(v,v),y=_*_-g*(Cr(d,d)-1);if(!(y<0)){var m=P_(y),x=Lr(v,(-_-m)/g);if(Pr(x,d),x=Er(x),!e)return x;var b,w=t[0],M=n[0],T=t[1],S=n[1];M0^x[1]<(w_(x[0]-w)__^(w<=x[0]&&x[0]<=M)){var A=Lr(v,(-_+m)/g);return Pr(A,d),[x,Er(A)]}}}function u(n,e){var r=c?t:__-t,i=0;return n<-r?i|=1:n>r&&(i|=2),e<-r?i|=4:e>r&&(i|=8),i}var a=S_(t),c=a>0,s=w_(a)>v_;return Kg(r,i,e,c?[0,-t]:[-__,t-__])},ey=function(t){return{stream:Zi(t)}};Gi.prototype={constructor:Gi,point:function(t,n){this.stream.point(t,n)},sphere:function(){this.stream.sphere()},lineStart:function(){this.stream.lineStart()},lineEnd:function(){this.stream.lineEnd()},polygonStart:function(){this.stream.polygonStart()},polygonEnd:function(){this.stream.polygonEnd()}};var ry=16,iy=S_(30*b_),oy=function(t,n){return+n?to(t,n):Ki(t)},uy=Zi({point:function(t,n){this.stream.point(t*b_,n*b_)}}),ay=function(){return ro(oo).scale(155.424).center([0,33.6442])},cy=function(){return ay().parallels([29.5,45.5]).scale(1070).translate([480,250]).rotate([96,0]).center([-.6,38.7])},sy=function(){function t(t){var n=t[0],e=t[1];return a=null,i.point(n,e),a||(o.point(n,e),a)||(u.point(n,e),a)}function n(){return e=r=null,t}var e,r,i,o,u,a,c=cy(),s=ay().rotate([154,0]).center([-2,58.5]).parallels([55,65]),f=ay().rotate([157,0]).center([-3,19.9]).parallels([8,18]),l={point:function(t,n){a=[t,n]}};return t.invert=function(t){var n=c.scale(),e=c.translate(),r=(t[0]-e[0])/n,i=(t[1]-e[1])/n;return(i>=.12&&i<.234&&r>=-.425&&r<-.214?s:i>=.166&&i<.234&&r>=-.214&&r<-.115?f:c).invert(t)},t.stream=function(t){return e&&r===t?e:e=uo([c.stream(r=t),s.stream(t),f.stream(t)])},t.precision=function(t){return arguments.length?(c.precision(t),s.precision(t),f.precision(t),n()):c.precision()},t.scale=function(n){return arguments.length?(c.scale(n),s.scale(.35*n),f.scale(n),t.translate(c.translate())):c.scale()},t.translate=function(t){if(!arguments.length)return c.translate();var e=c.scale(),r=+t[0],a=+t[1];return i=c.translate(t).clipExtent([[r-.455*e,a-.238*e],[r+.455*e,a+.238*e]]).stream(l),o=s.translate([r-.307*e,a+.201*e]).clipExtent([[r-.425*e+v_,a+.12*e+v_],[r-.214*e-v_,a+.234*e-v_]]).stream(l),u=f.translate([r-.205*e,a+.212*e]).clipExtent([[r-.214*e+v_,a+.166*e+v_],[r-.115*e-v_,a+.234*e-v_]]).stream(l),n()},t.fitExtent=function(n,e){return Ji(t,n,e)},t.fitSize=function(n,e){return Qi(t,n,e)},t.scale(1070)},fy=ao(function(t){return P_(2/(1+t))});fy.invert=co(function(t){return 2*yr(t/2)});var ly=function(){return no(fy).scale(124.75).clipAngle(179.999)},hy=ao(function(t){return(t=gr(t))&&t/C_(t)});hy.invert=co(function(t){return t});var py=function(){return no(hy).scale(79.4188).clipAngle(179.999)};so.invert=function(t,n){return[t,2*M_(k_(n))-g_]};var dy=function(){return fo(so).scale(961/m_)},vy=function(){return ro(ho).scale(109.5).parallels([30,30])};po.invert=po;var _y=function(){return no(po).scale(152.63)},gy=function(){return ro(vo).scale(131.154).center([0,13.9389])};_o.invert=co(M_);var yy=function(){return no(_o).scale(144.049).clipAngle(60)},my=function(){function t(){return i=o=null,u}var n,e,r,i,o,u,a=1,c=0,s=0,f=1,l=1,h=Ng,p=null,d=Ng;return u={stream:function(t){return i&&o===t?i:i=h(d(o=t))},clipExtent:function(i){return arguments.length?(d=null==i?(p=n=e=r=null,Ng):fi(p=+i[0][0],n=+i[0][1],e=+i[1][0],r=+i[1][1]),t()):null==p?null:[[p,n],[e,r]]},scale:function(n){return arguments.length?(h=go((a=+n)*f,a*l,c,s),t()):a},translate:function(n){return arguments.length?(h=go(a*f,a*l,c=+n[0],s=+n[1]),t()):[c,s]},reflectX:function(n){return arguments.length?(h=go(a*(f=n?-1:1),a*l,c,s),t()):f<0},reflectY:function(n){return arguments.length?(h=go(a*f,a*(l=n?-1:1),c,s),t()):l<0},fitExtent:function(t,n){return Ji(u,t,n)},fitSize:function(t,n){return Qi(u,t,n)}}};yo.invert=co(yr);var xy=function(){return no(yo).scale(249.5).clipAngle(90+v_)};mo.invert=co(function(t){return 2*M_(t)});var by=function(){return no(mo).scale(250).clipAngle(142)};xo.invert=function(t,n){return[-n,2*M_(k_(t))-g_]};var wy=function(){var t=fo(xo),n=t.center,e=t.rotate;return t.center=function(t){return arguments.length?n([-t[1],t[0]]):(t=n(),[t[1],-t[0]])},t.rotate=function(t){return arguments.length?e([t[0],t[1],t.length>2?t[2]+90:90]):(t=e(),[t[0],t[1],t[2]-90])},e([0,0,90]).scale(159.155)},My=function(){function t(t){var o,u=0;t.eachAfter(function(t){var e=t.children;e?(t.x=wo(e),t.y=To(e)):(t.x=o?u+=n(t,o):0,t.y=0,o=t)});var a=No(t),c=ko(t),s=a.x-n(a,c)/2,f=c.x+n(c,a)/2;return t.eachAfter(i?function(n){n.x=(n.x-t.x)*e,n.y=(t.y-n.y)*r}:function(n){n.x=(n.x-s)/(f-s)*e,n.y=(1-(t.y?n.y/t.y:1))*r})}var n=bo,e=1,r=1,i=!1;return t.separation=function(e){return arguments.length?(n=e,t):n},t.size=function(n){return arguments.length?(i=!1,e=+n[0],r=+n[1],t):i?null:[e,r]},t.nodeSize=function(n){return arguments.length?(i=!0,e=+n[0],r=+n[1],t):i?[e,r]:null},t},Ty=function(){return this.eachAfter(Eo)},Sy=function(t){var n,e,r,i,o=this,u=[o];do{for(n=u.reverse(),u=[];o=n.pop();)if(t(o),e=o.children)for(r=0,i=e.length;r=0;--e)i.push(n[e]);return this},ky=function(t){for(var n,e,r,i=this,o=[i],u=[];i=o.pop();)if(u.push(i),n=i.children)for(e=0,r=n.length;e=0;)e+=r[i].value;n.value=e})},Ay=function(t){return this.eachBefore(function(n){n.children&&n.children.sort(t)})},Cy=function(t){for(var n=this,e=Ao(n,t),r=[n];n!==e;)n=n.parent,r.push(n);for(var i=r.length;t!==e;)r.splice(i,0,t),t=t.parent;return r},zy=function(){for(var t=this,n=[t];t=t.parent;)n.push(t);return n},Py=function(){var t=[];return this.each(function(n){t.push(n)}),t},Ly=function(){var t=[];return this.eachBefore(function(n){n.children||t.push(n)}),t},Ry=function(){var t=this,n=[];return t.each(function(e){e!==t&&n.push({source:e.parent,target:e})}),n};qo.prototype=Co.prototype={constructor:qo,count:Ty,each:Sy,eachAfter:ky,eachBefore:Ny,sum:Ey,sort:Ay,path:Cy,ancestors:zy,descendants:Py,leaves:Ly,links:Ry,copy:zo};var qy=function(t){for(var n=(t=t.slice()).length,e=null,r=e;n;){var i=new Uo(t[n-1]);r=r?r.next=i:e=i,t[void 0]=t[--n]}return{head:e,tail:r}},Uy=function(t){return Oo(qy(t),[])},Dy=function(t){return Vo(t),t},Oy=function(t){return function(){return t}},Fy=function(){function t(t){return t.x=e/2,t.y=r/2,n?t.eachBefore(Jo(n)).eachAfter(Qo(i,.5)).eachBefore(Ko(1)):t.eachBefore(Jo(Go)).eachAfter(Qo(Zo,1)).eachAfter(Qo(i,t.r/Math.min(e,r))).eachBefore(Ko(Math.min(e,r)/(2*t.r))),t}var n=null,e=1,r=1,i=Zo;return t.radius=function(e){return arguments.length?(n=$o(e),t):n},t.size=function(n){return arguments.length?(e=+n[0],r=+n[1],t):[e,r]},t.padding=function(n){return arguments.length?(i="function"==typeof n?n:Oy(+n),t):i},t},Iy=function(t){t.x0=Math.round(t.x0),t.y0=Math.round(t.y0),t.x1=Math.round(t.x1),t.y1=Math.round(t.y1)},Yy=function(t,n,e,r,i){for(var o,u=t.children,a=-1,c=u.length,s=t.value&&(r-n)/t.value;++a0)throw new Error("cycle");return o}var n=tu,e=nu;return t.id=function(e){return arguments.length?(n=Wo(e),t):n},t.parentId=function(n){return arguments.length?(e=Wo(n),t):e},t};cu.prototype=Object.create(qo.prototype);var $y=function(){function t(t){var r=su(t);if(r.eachAfter(n),r.parent.m=-r.z,r.eachBefore(e),c)t.eachBefore(i);else{var s=t,f=t,l=t;t.eachBefore(function(t){t.xf.x&&(f=t),t.depth>l.depth&&(l=t)});var h=s===f?1:o(s,f)/2,p=h-s.x,d=u/(f.x+h+p),v=a/(l.depth||1);t.eachBefore(function(t){t.x=(t.x+p)*d,t.y=t.depth*v})}return t}function n(t){var n=t.children,e=t.parent.children,i=t.i?e[t.i-1]:null;if(n){uu(t);var u=(n[0].z+n[n.length-1].z)/2;i?(t.z=i.z+o(t._,i._),t.m=t.z-u):t.z=u}else i&&(t.z=i.z+o(t._,i._));t.parent.A=r(t,i,t.parent.A||e[0])}function e(t){t._.x=t.z+t.parent.m,t.m+=t.parent.m}function r(t,n,e){if(n){for(var r,i=t,u=t,a=n,c=i.parent.children[0],s=i.m,f=u.m,l=a.m,h=c.m;a=iu(a),i=ru(i),a&&i;)c=ru(c),u=iu(u),u.a=t,r=a.z+l-i.z-s+o(a._,i._),r>0&&(ou(au(a,t,e),t,r),s+=r,f+=r),l+=a.m,s+=i.m,h+=c.m,f+=u.m;a&&!iu(u)&&(u.t=a,u.m+=l-f),i&&!ru(c)&&(c.t=i,c.m+=s-h,e=t)}return e}function i(t){t.x*=u,t.y=t.depth*a}var o=eu,u=1,a=1,c=null;return t.separation=function(n){return arguments.length?(o=n,t):o},t.size=function(n){return arguments.length?(c=!1,u=+n[0],a=+n[1],t):c?null:[u,a]},t.nodeSize=function(n){return arguments.length?(c=!0,u=+n[0],a=+n[1],t):c?[u,a]:null},t},Wy=function(t,n,e,r,i){for(var o,u=t.children,a=-1,c=u.length,s=t.value&&(i-e)/t.value;++a1?n:1)},e}(Zy),Jy=function(){function t(t){return t.x0=t.y0=0,t.x1=i,t.y1=o,t.eachBefore(n),u=[0],r&&t.eachBefore(Iy),t}function n(t){var n=u[t.depth],r=t.x0+n,i=t.y0+n,o=t.x1-n,h=t.y1-n;o=n-1){var s=c[t];return s.x0=r,s.y0=i,s.x1=u,s.y1=a,void 0}for(var l=f[t],h=e/2+l,p=t+1,d=n-1;p>>1;f[v]a-i){var y=(r*g+u*_)/e;o(t,p,_,r,i,y,a),o(p,n,g,y,i,u,a)}else{var m=(i*g+a*_)/e;o(t,p,_,r,i,u,m),o(p,n,g,r,m,u,a)}}var u,a,c=t.children,s=c.length,f=new Array(s+1);for(f[0]=a=u=0;u1?n:1)},e}(Zy),nm=function(t){for(var n,e=-1,r=t.length,i=t[r-1],o=0;++e=0;--n)s.push(t[r[o[n]][2]]);for(n=+a;na!=s>a&&u<(c-e)*(a-r)/(s-r)+e&&(f=!f),c=e,s=r;return f},um=function(t){for(var n,e,r=-1,i=t.length,o=t[i-1],u=o[0],a=o[1],c=0;++r1);return t+n*i*Math.sqrt(-2*Math.log(r)/r)}},lm=function(){var t=fm.apply(this,arguments);return function(){return Math.exp(t())}},hm=function(t){return function(){for(var n=0,e=0;e=200&&e<300||304===e){if(o)try{n=o.call(r,s)}catch(t){return void a.call("error",r,t)}else n=s;a.call("load",r,n)}else a.call("error",r,t)}var r,i,o,u,a=v("beforesend","progress","load","error"),c=Be(),s=new XMLHttpRequest,f=null,l=null,h=0;if("undefined"==typeof XDomainRequest||"withCredentials"in s||!/^(http(s)?:)?\/\//.test(t)||(s=new XDomainRequest),"onload"in s?s.onload=s.onerror=s.ontimeout=e:s.onreadystatechange=function(t){s.readyState>3&&e(t)},s.onprogress=function(t){a.call("progress",r,t)},r={header:function(t,n){return t=(t+"").toLowerCase(),arguments.length<2?c.get(t):(null==n?c.remove(t):c.set(t,n+""),r)},mimeType:function(t){return arguments.length?(i=null==t?null:t+"",r):i},responseType:function(t){return arguments.length?(u=t,r):u},timeout:function(t){return arguments.length?(h=+t,r):h},user:function(t){return arguments.length<1?f:(f=null==t?null:t+"",r)},password:function(t){return arguments.length<1?l:(l=null==t?null:t+"",r)},response:function(t){return o=t,r},get:function(t,n){return r.send("GET",t,n)},post:function(t,n){return r.send("POST",t,n)},send:function(n,e,o){return s.open(n,t,!0,f,l),null==i||c.has("accept")||c.set("accept",i+",*/*"),s.setRequestHeader&&c.each(function(t,n){s.setRequestHeader(n,t)}),null!=i&&s.overrideMimeType&&s.overrideMimeType(i),null!=u&&(s.responseType=u),h>0&&(s.timeout=h),null==o&&"function"==typeof e&&(o=e,e=null),null!=o&&1===o.length&&(o=xu(o)),null!=o&&r.on("error",o).on("load",function(t){o(null,t)}),a.call("beforesend",r,s),s.send(null==e?null:e),r},abort:function(){return s.abort(),r},on:function(){var t=a.on.apply(a,arguments);return t===a?r:t}},null!=n){if("function"!=typeof n)throw new Error("invalid callback: "+n);return r.get(n)}return r},_m=function(t,n){return function(e,r){var i=vm(e).mimeType(t).response(n);if(null!=r){if("function"!=typeof r)throw new Error("invalid callback: "+r);return i.get(r)}return i}},gm=_m("text/html",function(t){return document.createRange().createContextualFragment(t.responseText)}),ym=_m("application/json",function(t){return JSON.parse(t.responseText)}),mm=_m("text/plain",function(t){return t.responseText}),xm=_m("application/xml",function(t){var n=t.responseXML;if(!n)throw new Error("parse error");return n}),bm=function(t,n){return function(e,r,i){arguments.length<3&&(i=r,r=null);var o=vm(e).mimeType(t);return o.row=function(t){return arguments.length?o.response(wu(n,r=t)):r},o.row(r),i?o.get(i):o}},wm=bm("text/csv",Fd),Mm=bm("text/tab-separated-values",Hd),Tm=Array.prototype,Sm=Tm.map,Nm=Tm.slice,km={name:"implicit"},Em=function(t){return function(){return t}},Am=function(t){return+t},Cm=[0,1],zm=function(n,e,r){var o,u=n[0],a=n[n.length-1],c=i(u,a,null==e?10:e);switch(r=hr(null==r?",f":r),r.type){case"s":var s=Math.max(Math.abs(u),Math.abs(a));return null!=r.precision||isNaN(o=Rv(c,s))||(r.precision=o),t.formatPrefix(r,s);case"":case"e":case"g":case"p":case"r":null!=r.precision||isNaN(o=qv(c,Math.max(Math.abs(u),Math.abs(a))))||(r.precision=o-("e"===r.type));break;case"f":case"%":null!=r.precision||isNaN(o=Lv(c))||(r.precision=o-2*("%"===r.type))}return t.format(r)},Pm=function(t,n){t=t.slice();var e,r=0,i=t.length-1,o=t[r],u=t[i];return u0?t>1?Gu(function(n){n.setTime(Math.floor(n/t)*t)},function(n,e){n.setTime(+n+e*t)},function(n,e){return(e-n)/t}):qm:null};var Um=qm.range,Dm=6e4,Om=6048e5,Fm=Gu(function(t){t.setTime(1e3*Math.floor(t/1e3))},function(t,n){t.setTime(+t+1e3*n)},function(t,n){return(n-t)/1e3},function(t){return t.getUTCSeconds()}),Im=Fm.range,Ym=Gu(function(t){t.setTime(Math.floor(t/Dm)*Dm)},function(t,n){t.setTime(+t+n*Dm)},function(t,n){return(n-t)/Dm},function(t){return t.getMinutes()}),Bm=Ym.range,jm=Gu(function(t){var n=t.getTimezoneOffset()*Dm%36e5;n<0&&(n+=36e5),t.setTime(36e5*Math.floor((+t-n)/36e5)+n)},function(t,n){t.setTime(+t+36e5*n)},function(t,n){return(n-t)/36e5},function(t){return t.getHours()}),Hm=jm.range,Xm=Gu(function(t){t.setHours(0,0,0,0)},function(t,n){t.setDate(t.getDate()+n)},function(t,n){return(n-t-(n.getTimezoneOffset()-t.getTimezoneOffset())*Dm)/864e5},function(t){return t.getDate()-1}),Vm=Xm.range,$m=Ju(0),Wm=Ju(1),Zm=Ju(2),Gm=Ju(3),Jm=Ju(4),Qm=Ju(5),Km=Ju(6),tx=$m.range,nx=Wm.range,ex=Zm.range,rx=Gm.range,ix=Jm.range,ox=Qm.range,ux=Km.range,ax=Gu(function(t){t.setDate(1),t.setHours(0,0,0,0)},function(t,n){t.setMonth(t.getMonth()+n)},function(t,n){return n.getMonth()-t.getMonth()+12*(n.getFullYear()-t.getFullYear())},function(t){return t.getMonth()}),cx=ax.range,sx=Gu(function(t){t.setMonth(0,1),t.setHours(0,0,0,0)},function(t,n){t.setFullYear(t.getFullYear()+n)},function(t,n){return n.getFullYear()-t.getFullYear()},function(t){return t.getFullYear()});sx.every=function(t){return isFinite(t=Math.floor(t))&&t>0?Gu(function(n){n.setFullYear(Math.floor(n.getFullYear()/t)*t),n.setMonth(0,1),n.setHours(0,0,0,0)},function(n,e){n.setFullYear(n.getFullYear()+e*t)}):null};var fx=sx.range,lx=Gu(function(t){t.setUTCSeconds(0,0)},function(t,n){t.setTime(+t+n*Dm)},function(t,n){return(n-t)/Dm},function(t){return t.getUTCMinutes()}),hx=lx.range,px=Gu(function(t){t.setUTCMinutes(0,0,0)},function(t,n){t.setTime(+t+36e5*n)},function(t,n){return(n-t)/36e5},function(t){return t.getUTCHours()}),dx=px.range,vx=Gu(function(t){t.setUTCHours(0,0,0,0)},function(t,n){t.setUTCDate(t.getUTCDate()+n)},function(t,n){return(n-t)/864e5},function(t){return t.getUTCDate()-1}),_x=vx.range,gx=Qu(0),yx=Qu(1),mx=Qu(2),xx=Qu(3),bx=Qu(4),wx=Qu(5),Mx=Qu(6),Tx=gx.range,Sx=yx.range,Nx=mx.range,kx=xx.range,Ex=bx.range,Ax=wx.range,Cx=Mx.range,zx=Gu(function(t){t.setUTCDate(1),t.setUTCHours(0,0,0,0)},function(t,n){t.setUTCMonth(t.getUTCMonth()+n)},function(t,n){return n.getUTCMonth()-t.getUTCMonth()+12*(n.getUTCFullYear()-t.getUTCFullYear())},function(t){return t.getUTCMonth()}),Px=zx.range,Lx=Gu(function(t){t.setUTCMonth(0,1),t.setUTCHours(0,0,0,0)},function(t,n){t.setUTCFullYear(t.getUTCFullYear()+n)},function(t,n){return n.getUTCFullYear()-t.getUTCFullYear()},function(t){return t.getUTCFullYear()});Lx.every=function(t){return isFinite(t=Math.floor(t))&&t>0?Gu(function(n){n.setUTCFullYear(Math.floor(n.getUTCFullYear()/t)*t),n.setUTCMonth(0,1),n.setUTCHours(0,0,0,0)},function(n,e){n.setUTCFullYear(n.getUTCFullYear()+e*t)}):null};var Rx,qx=Lx.range,Ux={"-":"",_:" ",0:"0"},Dx=/^\s*\d+/,Ox=/^%/,Fx=/[\\\^\$\*\+\?\|\[\]\(\)\.\{\}]/g;Ga({dateTime:"%x, %X",date:"%-m/%-d/%Y",time:"%-I:%M:%S %p",periods:["AM","PM"],days:["Sunday","Monday","Tuesday","Wednesday","Thursday","Friday","Saturday"],shortDays:["Sun","Mon","Tue","Wed","Thu","Fri","Sat"],months:["January","February","March","April","May","June","July","August","September","October","November","December"],shortMonths:["Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"]});var Ix=Date.prototype.toISOString?Ja:t.utcFormat("%Y-%m-%dT%H:%M:%S.%LZ"),Yx=+new Date("2000-01-01T00:00:00.000Z")?Qa:t.utcParse("%Y-%m-%dT%H:%M:%S.%LZ"),Bx=1e3,jx=60*Bx,Hx=60*jx,Xx=24*Hx,Vx=7*Xx,$x=30*Xx,Wx=365*Xx,Zx=function(){return nc(sx,ax,$m,Xm,jm,Ym,Fm,qm,t.timeFormat).domain([new Date(2e3,0,1),new Date(2e3,0,2)])},Gx=function(){return nc(Lx,zx,gx,vx,px,lx,Fm,qm,t.utcFormat).domain([Date.UTC(2e3,0,1),Date.UTC(2e3,0,2)])},Jx=function(t){return t.match(/.{6}/g).map(function(t){return"#"+t})},Qx=Jx("1f77b4ff7f0e2ca02cd627289467bd8c564be377c27f7f7fbcbd2217becf"),Kx=Jx("393b795254a36b6ecf9c9ede6379398ca252b5cf6bcedb9c8c6d31bd9e39e7ba52e7cb94843c39ad494ad6616be7969c7b4173a55194ce6dbdde9ed6"),tb=Jx("3182bd6baed69ecae1c6dbefe6550dfd8d3cfdae6bfdd0a231a35474c476a1d99bc7e9c0756bb19e9ac8bcbddcdadaeb636363969696bdbdbdd9d9d9"),nb=Jx("1f77b4aec7e8ff7f0effbb782ca02c98df8ad62728ff98969467bdc5b0d58c564bc49c94e377c2f7b6d27f7f7fc7c7c7bcbd22dbdb8d17becf9edae5"),eb=Fh($t(300,.5,0),$t(-240,.5,1)),rb=Fh($t(-100,.75,.35),$t(80,1.5,.8)),ib=Fh($t(260,.75,.35),$t(80,1.5,.8)),ob=$t(),ub=function(t){(t<0||t>1)&&(t-=Math.floor(t));var n=Math.abs(t-.5);return ob.h=360*t-100,ob.s=1.5-1.5*n,ob.l=.8-.9*n,ob+""},ab=ec(Jx("44015444025645045745055946075a46085c460a5d460b5e470d60470e6147106347116447136548146748166848176948186a481a6c481b6d481c6e481d6f481f70482071482173482374482475482576482677482878482979472a7a472c7a472d7b472e7c472f7d46307e46327e46337f463480453581453781453882443983443a83443b84433d84433e85423f854240864241864142874144874045884046883f47883f48893e49893e4a893e4c8a3d4d8a3d4e8a3c4f8a3c508b3b518b3b528b3a538b3a548c39558c39568c38588c38598c375a8c375b8d365c8d365d8d355e8d355f8d34608d34618d33628d33638d32648e32658e31668e31678e31688e30698e306a8e2f6b8e2f6c8e2e6d8e2e6e8e2e6f8e2d708e2d718e2c718e2c728e2c738e2b748e2b758e2a768e2a778e2a788e29798e297a8e297b8e287c8e287d8e277e8e277f8e27808e26818e26828e26828e25838e25848e25858e24868e24878e23888e23898e238a8d228b8d228c8d228d8d218e8d218f8d21908d21918c20928c20928c20938c1f948c1f958b1f968b1f978b1f988b1f998a1f9a8a1e9b8a1e9c891e9d891f9e891f9f881fa0881fa1881fa1871fa28720a38620a48621a58521a68522a78522a88423a98324aa8325ab8225ac8226ad8127ad8128ae8029af7f2ab07f2cb17e2db27d2eb37c2fb47c31b57b32b67a34b67935b77937b87838b9773aba763bbb753dbc743fbc7340bd7242be7144bf7046c06f48c16e4ac16d4cc26c4ec36b50c46a52c56954c56856c66758c7655ac8645cc8635ec96260ca6063cb5f65cb5e67cc5c69cd5b6ccd5a6ece5870cf5773d05675d05477d1537ad1517cd2507fd34e81d34d84d44b86d54989d5488bd6468ed64590d74393d74195d84098d83e9bd93c9dd93ba0da39a2da37a5db36a8db34aadc32addc30b0dd2fb2dd2db5de2bb8de29bade28bddf26c0df25c2df23c5e021c8e020cae11fcde11dd0e11cd2e21bd5e21ad8e219dae319dde318dfe318e2e418e5e419e7e419eae51aece51befe51cf1e51df4e61ef6e620f8e621fbe723fde725")),cb=ec(Jx("00000401000501010601010802010902020b02020d03030f03031204041405041606051806051a07061c08071e0907200a08220b09240c09260d0a290e0b2b100b2d110c2f120d31130d34140e36150e38160f3b180f3d19103f1a10421c10441d11471e114920114b21114e22115024125325125527125829115a2a115c2c115f2d11612f116331116533106734106936106b38106c390f6e3b0f703d0f713f0f72400f74420f75440f764510774710784910784a10794c117a4e117b4f127b51127c52137c54137d56147d57157e59157e5a167e5c167f5d177f5f187f601880621980641a80651a80671b80681c816a1c816b1d816d1d816e1e81701f81721f817320817521817621817822817922827b23827c23827e24828025828125818326818426818627818827818928818b29818c29818e2a81902a81912b81932b80942c80962c80982d80992d809b2e7f9c2e7f9e2f7fa02f7fa1307ea3307ea5317ea6317da8327daa337dab337cad347cae347bb0357bb2357bb3367ab5367ab73779b83779ba3878bc3978bd3977bf3a77c03a76c23b75c43c75c53c74c73d73c83e73ca3e72cc3f71cd4071cf4070d0416fd2426fd3436ed5446dd6456cd8456cd9466bdb476adc4869de4968df4a68e04c67e24d66e34e65e44f64e55064e75263e85362e95462ea5661eb5760ec5860ed5a5fee5b5eef5d5ef05f5ef1605df2625df2645cf3655cf4675cf4695cf56b5cf66c5cf66e5cf7705cf7725cf8745cf8765cf9785df9795df97b5dfa7d5efa7f5efa815ffb835ffb8560fb8761fc8961fc8a62fc8c63fc8e64fc9065fd9266fd9467fd9668fd9869fd9a6afd9b6bfe9d6cfe9f6dfea16efea36ffea571fea772fea973feaa74feac76feae77feb078feb27afeb47bfeb67cfeb77efeb97ffebb81febd82febf84fec185fec287fec488fec68afec88cfeca8dfecc8ffecd90fecf92fed194fed395fed597fed799fed89afdda9cfddc9efddea0fde0a1fde2a3fde3a5fde5a7fde7a9fde9aafdebacfcecaefceeb0fcf0b2fcf2b4fcf4b6fcf6b8fcf7b9fcf9bbfcfbbdfcfdbf")),sb=ec(Jx("00000401000501010601010802010a02020c02020e03021004031204031405041706041907051b08051d09061f0a07220b07240c08260d08290e092b10092d110a30120a32140b34150b37160b39180c3c190c3e1b0c411c0c431e0c451f0c48210c4a230c4c240c4f260c51280b53290b552b0b572d0b592f0a5b310a5c320a5e340a5f3609613809623909633b09643d09653e0966400a67420a68440a68450a69470b6a490b6a4a0c6b4c0c6b4d0d6c4f0d6c510e6c520e6d540f6d550f6d57106e59106e5a116e5c126e5d126e5f136e61136e62146e64156e65156e67166e69166e6a176e6c186e6d186e6f196e71196e721a6e741a6e751b6e771c6d781c6d7a1d6d7c1d6d7d1e6d7f1e6c801f6c82206c84206b85216b87216b88226a8a226a8c23698d23698f24699025689225689326679526679727669827669a28659b29649d29649f2a63a02a63a22b62a32c61a52c60a62d60a82e5fa92e5eab2f5ead305dae305cb0315bb1325ab3325ab43359b63458b73557b93556ba3655bc3754bd3853bf3952c03a51c13a50c33b4fc43c4ec63d4dc73e4cc83f4bca404acb4149cc4248ce4347cf4446d04545d24644d34743d44842d54a41d74b3fd84c3ed94d3dda4e3cdb503bdd513ade5238df5337e05536e15635e25734e35933e45a31e55c30e65d2fe75e2ee8602de9612bea632aeb6429eb6628ec6726ed6925ee6a24ef6c23ef6e21f06f20f1711ff1731df2741cf3761bf37819f47918f57b17f57d15f67e14f68013f78212f78410f8850ff8870ef8890cf98b0bf98c0af98e09fa9008fa9207fa9407fb9606fb9706fb9906fb9b06fb9d07fc9f07fca108fca309fca50afca60cfca80dfcaa0ffcac11fcae12fcb014fcb216fcb418fbb61afbb81dfbba1ffbbc21fbbe23fac026fac228fac42afac62df9c72ff9c932f9cb35f8cd37f8cf3af7d13df7d340f6d543f6d746f5d949f5db4cf4dd4ff4df53f4e156f3e35af3e55df2e661f2e865f2ea69f1ec6df1ed71f1ef75f1f179f2f27df2f482f3f586f3f68af4f88ef5f992f6fa96f8fb9af9fc9dfafda1fcffa4")),fb=ec(Jx("0d088710078813078916078a19068c1b068d1d068e20068f2206902406912605912805922a05932c05942e05952f059631059733059735049837049938049a3a049a3c049b3e049c3f049c41049d43039e44039e46039f48039f4903a04b03a14c02a14e02a25002a25102a35302a35502a45601a45801a45901a55b01a55c01a65e01a66001a66100a76300a76400a76600a76700a86900a86a00a86c00a86e00a86f00a87100a87201a87401a87501a87701a87801a87a02a87b02a87d03a87e03a88004a88104a78305a78405a78606a68707a68808a68a09a58b0aa58d0ba58e0ca48f0da4910ea3920fa39410a29511a19613a19814a099159f9a169f9c179e9d189d9e199da01a9ca11b9ba21d9aa31e9aa51f99a62098a72197a82296aa2395ab2494ac2694ad2793ae2892b02991b12a90b22b8fb32c8eb42e8db52f8cb6308bb7318ab83289ba3388bb3488bc3587bd3786be3885bf3984c03a83c13b82c23c81c33d80c43e7fc5407ec6417dc7427cc8437bc9447aca457acb4679cc4778cc4977cd4a76ce4b75cf4c74d04d73d14e72d24f71d35171d45270d5536fd5546ed6556dd7566cd8576bd9586ada5a6ada5b69db5c68dc5d67dd5e66de5f65de6164df6263e06363e16462e26561e26660e3685fe4695ee56a5de56b5de66c5ce76e5be76f5ae87059e97158e97257ea7457eb7556eb7655ec7754ed7953ed7a52ee7b51ef7c51ef7e50f07f4ff0804ef1814df1834cf2844bf3854bf3874af48849f48948f58b47f58c46f68d45f68f44f79044f79143f79342f89441f89540f9973ff9983ef99a3efa9b3dfa9c3cfa9e3bfb9f3afba139fba238fca338fca537fca636fca835fca934fdab33fdac33fdae32fdaf31fdb130fdb22ffdb42ffdb52efeb72dfeb82cfeba2cfebb2bfebd2afebe2afec029fdc229fdc328fdc527fdc627fdc827fdca26fdcb26fccd25fcce25fcd025fcd225fbd324fbd524fbd724fad824fada24f9dc24f9dd25f8df25f8e125f7e225f7e425f6e626f6e826f5e926f5eb27f4ed27f3ee27f3f027f2f227f1f426f1f525f0f724f0f921")),lb=function(t){return function(){return t}},hb=Math.abs,pb=Math.atan2,db=Math.cos,vb=Math.max,_b=Math.min,gb=Math.sin,yb=Math.sqrt,mb=1e-12,xb=Math.PI,bb=xb/2,wb=2*xb,Mb=function(){function t(){var t,s,f=+n.apply(this,arguments),l=+e.apply(this,arguments),h=o.apply(this,arguments)-bb,p=u.apply(this,arguments)-bb,d=hb(p-h),v=p>h;if(c||(c=t=qe()),lmb)if(d>wb-mb)c.moveTo(l*db(h),l*gb(h)),c.arc(0,0,l,h,p,!v),f>mb&&(c.moveTo(f*db(p),f*gb(p)),c.arc(0,0,f,p,h,v));else{var _,g,y=h,m=p,x=h,b=p,w=d,M=d,T=a.apply(this,arguments)/2,S=T>mb&&(i?+i.apply(this,arguments):yb(f*f+l*l)),N=_b(hb(l-f)/2,+r.apply(this,arguments)),k=N,E=N;if(S>mb){var A=oc(S/f*gb(T)),C=oc(S/l*gb(T));(w-=2*A)>mb?(A*=v?1:-1,x+=A,b-=A):(w=0,x=b=(h+p)/2),(M-=2*C)>mb?(C*=v?1:-1,y+=C,m-=C):(M=0,y=m=(h+p)/2)}var z=l*db(y),P=l*gb(y),L=f*db(b),R=f*gb(b);if(N>mb){var q=l*db(m),U=l*gb(m),D=f*db(x),O=f*gb(x);if(dmb?lc(z,P,D,O,q,U,L,R):[L,R],I=z-F[0],Y=P-F[1],B=q-F[0],j=U-F[1],H=1/gb(ic((I*B+Y*j)/(yb(I*I+Y*Y)*yb(B*B+j*j)))/2),X=yb(F[0]*F[0]+F[1]*F[1]);k=_b(N,(f-X)/(H-1)),E=_b(N,(l-X)/(H+1))}}M>mb?E>mb?(_=hc(D,O,z,P,l,E,v),g=hc(q,U,L,R,l,E,v),c.moveTo(_.cx+_.x01,_.cy+_.y01),Emb&&w>mb?k>mb?(_=hc(L,R,q,U,f,-k,v),g=hc(z,P,D,O,f,-k,v),c.lineTo(_.cx+_.x01,_.cy+_.y01),k=f;--l)s.point(_[l],g[l]);s.lineEnd(),s.areaEnd()}v&&(_[n]=+e(h,n,t),g[n]=+i(h,n,t),s.point(r?+r(h,n,t):_[n],o?+o(h,n,t):g[n]))}if(p)return s=null,p+""||null}function n(){return Sb().defined(u).curve(c).context(a)}var e=dc,r=null,i=lb(0),o=vc,u=lb(!0),a=null,c=Tb,s=null;return t.x=function(n){return arguments.length?(e="function"==typeof n?n:lb(+n),r=null,t):e},t.x0=function(n){return arguments.length?(e="function"==typeof n?n:lb(+n),t):e},t.x1=function(n){return arguments.length?(r=null==n?null:"function"==typeof n?n:lb(+n),t):r},t.y=function(n){return arguments.length?(i="function"==typeof n?n:lb(+n),o=null,t):i},t.y0=function(n){return arguments.length?(i="function"==typeof n?n:lb(+n),t):i},t.y1=function(n){return arguments.length?(o=null==n?null:"function"==typeof n?n:lb(+n),t):o},t.lineX0=t.lineY0=function(){return n().x(e).y(i)},t.lineY1=function(){return n().x(e).y(o)},t.lineX1=function(){return n().x(r).y(i)},t.defined=function(n){return arguments.length?(u="function"==typeof n?n:lb(!!n),t):u},t.curve=function(n){return arguments.length?(c=n,null!=a&&(s=c(a)),t):c},t.context=function(n){return arguments.length?(null==n?a=s=null:s=c(a=n),t):a},t},kb=function(t,n){return nt?1:n>=t?0:NaN},Eb=function(t){return t},Ab=function(){function t(t){var a,c,s,f,l,h=t.length,p=0,d=new Array(h),v=new Array(h),_=+i.apply(this,arguments),g=Math.min(wb,Math.max(-wb,o.apply(this,arguments)-_)),y=Math.min(Math.abs(g)/h,u.apply(this,arguments)),m=y*(g<0?-1:1);for(a=0;a0&&(p+=l);for(null!=e?d.sort(function(t,n){return e(v[t],v[n])}):null!=r&&d.sort(function(n,e){return r(t[n],t[e])}),a=0,s=p?(g-h*m)/p:0;a0?l*s:0)+m,v[c]={data:t[c],index:a,value:l,startAngle:_,endAngle:f,padAngle:y};return v}var n=Eb,e=kb,r=null,i=lb(0),o=lb(wb),u=lb(0);return t.value=function(e){return arguments.length?(n="function"==typeof e?e:lb(+e),t):n},t.sortValues=function(n){return arguments.length?(e=n,r=null,t):e},t.sort=function(n){return arguments.length?(r=n,e=null,t):r},t.startAngle=function(n){return arguments.length?(i="function"==typeof n?n:lb(+n),t):i},t.endAngle=function(n){return arguments.length?(o="function"==typeof n?n:lb(+n),t):o},t.padAngle=function(n){return arguments.length?(u="function"==typeof n?n:lb(+n),t):u},t},Cb=gc(Tb);_c.prototype={areaStart:function(){this._curve.areaStart()},areaEnd:function(){this._curve.areaEnd()},lineStart:function(){this._curve.lineStart()},lineEnd:function(){this._curve.lineEnd()},point:function(t,n){this._curve.point(n*Math.sin(t),n*-Math.cos(t))}};var zb=function(){return yc(Sb().curve(Cb))},Pb=function(){var t=Nb().curve(Cb),n=t.curve,e=t.lineX0,r=t.lineX1,i=t.lineY0,o=t.lineY1;return t.angle=t.x,delete t.x,t.startAngle=t.x0,delete t.x0,t.endAngle=t.x1,delete t.x1,t.radius=t.y,delete t.y,t.innerRadius=t.y0,delete t.y0,t.outerRadius=t.y1,delete t.y1,t.lineStartAngle=function(){return yc(e())},delete t.lineX0,t.lineEndAngle=function(){return yc(r())},delete t.lineX1,t.lineInnerRadius=function(){return yc(i())},delete t.lineY0,t.lineOuterRadius=function(){return yc(o())},delete t.lineY1,t.curve=function(t){return arguments.length?n(gc(t)):n()._curve},t},Lb={draw:function(t,n){var e=Math.sqrt(n/xb);t.moveTo(e,0),t.arc(0,0,e,0,wb)}},Rb={draw:function(t,n){var e=Math.sqrt(n/5)/2;t.moveTo(-3*e,-e),t.lineTo(-e,-e),t.lineTo(-e,-3*e),t.lineTo(e,-3*e),t.lineTo(e,-e),t.lineTo(3*e,-e),t.lineTo(3*e,e),t.lineTo(e,e),t.lineTo(e,3*e),t.lineTo(-e,3*e),t.lineTo(-e,e),t.lineTo(-3*e,e),t.closePath()}},qb=Math.sqrt(1/3),Ub=2*qb,Db={draw:function(t,n){var e=Math.sqrt(n/Ub),r=e*qb;t.moveTo(0,-e),t.lineTo(r,0),t.lineTo(0,e),t.lineTo(-r,0),t.closePath()}},Ob=Math.sin(xb/10)/Math.sin(7*xb/10),Fb=Math.sin(wb/10)*Ob,Ib=-Math.cos(wb/10)*Ob,Yb={draw:function(t,n){var e=Math.sqrt(.8908130915292852*n),r=Fb*e,i=Ib*e;t.moveTo(0,-e),t.lineTo(r,i);for(var o=1;o<5;++o){var u=wb*o/5,a=Math.cos(u),c=Math.sin(u);t.lineTo(c*e,-a*e),t.lineTo(a*r-c*i,c*r+a*i)}t.closePath()}},Bb={draw:function(t,n){var e=Math.sqrt(n),r=-e/2;t.rect(r,r,e,e)}},jb=Math.sqrt(3),Hb={draw:function(t,n){var e=-Math.sqrt(n/(3*jb));t.moveTo(0,2*e),t.lineTo(-jb*e,-e),t.lineTo(jb*e,-e),t.closePath()}},Xb=-.5,Vb=Math.sqrt(3)/2,$b=1/Math.sqrt(12),Wb=3*($b/2+1),Zb={draw:function(t,n){var e=Math.sqrt(n/Wb),r=e/2,i=e*$b,o=r,u=e*$b+e,a=-o,c=u;t.moveTo(r,i),t.lineTo(o,u),t.lineTo(a,c),t.lineTo(Xb*r-Vb*i,Vb*r+Xb*i),t.lineTo(Xb*o-Vb*u,Vb*o+Xb*u),t.lineTo(Xb*a-Vb*c,Vb*a+Xb*c),t.lineTo(Xb*r+Vb*i,Xb*i-Vb*r),t.lineTo(Xb*o+Vb*u,Xb*u-Vb*o),t.lineTo(Xb*a+Vb*c,Xb*c-Vb*a),t.closePath()}},Gb=[Lb,Rb,Db,Bb,Yb,Hb,Zb],Jb=function(){function t(){var t;if(r||(r=t=qe()),n.apply(this,arguments).draw(r,+e.apply(this,arguments)),t)return r=null,t+""||null}var n=lb(Lb),e=lb(64),r=null;return t.type=function(e){return arguments.length?(n="function"==typeof e?e:lb(e),t):n},t.size=function(n){return arguments.length?(e="function"==typeof n?n:lb(+n),t):e},t.context=function(n){return arguments.length?(r=null==n?null:n,t):r},t},Qb=function(){};xc.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._y0=this._y1=NaN,this._point=0},lineEnd:function(){switch(this._point){case 3:mc(this,this._x1,this._y1);case 2:this._context.lineTo(this._x1,this._y1)}(this._line||0!==this._line&&1===this._point)&&this._context.closePath(),this._line=1-this._line},point:function(t,n){switch(t=+t,n=+n,this._point){case 0:this._point=1,this._line?this._context.lineTo(t,n):this._context.moveTo(t,n);break;case 1:this._point=2;break;case 2:this._point=3,this._context.lineTo((5*this._x0+this._x1)/6,(5*this._y0+this._y1)/6);default:mc(this,t,n)}this._x0=this._x1,this._x1=t,this._y0=this._y1,this._y1=n}};var Kb=function(t){return new xc(t)};bc.prototype={areaStart:Qb,areaEnd:Qb,lineStart:function(){this._x0=this._x1=this._x2=this._x3=this._x4=this._y0=this._y1=this._y2=this._y3=this._y4=NaN,this._point=0},lineEnd:function(){switch(this._point){case 1:this._context.moveTo(this._x2,this._y2),this._context.closePath();break;case 2:this._context.moveTo((this._x2+2*this._x3)/3,(this._y2+2*this._y3)/3),this._context.lineTo((this._x3+2*this._x2)/3,(this._y3+2*this._y2)/3),this._context.closePath();break;case 3:this.point(this._x2,this._y2),this.point(this._x3,this._y3),this.point(this._x4,this._y4)}},point:function(t,n){switch(t=+t,n=+n,this._point){case 0:this._point=1,this._x2=t,this._y2=n;break;case 1:this._point=2,this._x3=t,this._y3=n;break;case 2:this._point=3,this._x4=t,this._y4=n,this._context.moveTo((this._x0+4*this._x1+t)/6,(this._y0+4*this._y1+n)/6);break;default:mc(this,t,n)}this._x0=this._x1,this._x1=t,this._y0=this._y1,this._y1=n}};var tw=function(t){return new bc(t)};wc.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._y0=this._y1=NaN,this._point=0},lineEnd:function(){(this._line||0!==this._line&&3===this._point)&&this._context.closePath(),this._line=1-this._line},point:function(t,n){switch(t=+t,n=+n,this._point){case 0:this._point=1;break;case 1:this._point=2;break;case 2:this._point=3;var e=(this._x0+4*this._x1+t)/6,r=(this._y0+4*this._y1+n)/6;this._line?this._context.lineTo(e,r):this._context.moveTo(e,r);break;case 3:this._point=4;default:mc(this,t,n)}this._x0=this._x1,this._x1=t,this._y0=this._y1,this._y1=n}};var nw=function(t){return new wc(t)};Mc.prototype={lineStart:function(){this._x=[],this._y=[],this._basis.lineStart()},lineEnd:function(){var t=this._x,n=this._y,e=t.length-1;if(e>0)for(var r,i=t[0],o=n[0],u=t[e]-i,a=n[e]-o,c=-1;++c<=e;)r=c/e,this._basis.point(this._beta*t[c]+(1-this._beta)*(i+r*u),this._beta*n[c]+(1-this._beta)*(o+r*a));this._x=this._y=null,this._basis.lineEnd()},point:function(t,n){this._x.push(+t),this._y.push(+n)}};var ew=function t(n){function e(t){return 1===n?new xc(t):new Mc(t,n)}return e.beta=function(n){return t(+n)},e}(.85);Sc.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._x2=this._y0=this._y1=this._y2=NaN,this._point=0},lineEnd:function(){switch(this._point){case 2:this._context.lineTo(this._x2,this._y2);break;case 3:Tc(this,this._x1,this._y1)}(this._line||0!==this._line&&1===this._point)&&this._context.closePath(),this._line=1-this._line},point:function(t,n){switch(t=+t,n=+n,this._point){case 0:this._point=1,this._line?this._context.lineTo(t,n):this._context.moveTo(t,n);break;case 1:this._point=2,this._x1=t,this._y1=n;break;case 2:this._point=3;default:Tc(this,t,n)}this._x0=this._x1,this._x1=this._x2,this._x2=t,this._y0=this._y1,this._y1=this._y2,this._y2=n}};var rw=function t(n){function e(t){return new Sc(t,n)}return e.tension=function(n){return t(+n)},e}(0);Nc.prototype={areaStart:Qb,areaEnd:Qb,lineStart:function(){this._x0=this._x1=this._x2=this._x3=this._x4=this._x5=this._y0=this._y1=this._y2=this._y3=this._y4=this._y5=NaN,this._point=0},lineEnd:function(){switch(this._point){case 1:this._context.moveTo(this._x3,this._y3),this._context.closePath();break;case 2:this._context.lineTo(this._x3,this._y3),this._context.closePath();break;case 3:this.point(this._x3,this._y3),this.point(this._x4,this._y4),this.point(this._x5,this._y5)}},point:function(t,n){switch(t=+t,n=+n,this._point){case 0:this._point=1,this._x3=t,this._y3=n;break;case 1:this._point=2,this._context.moveTo(this._x4=t,this._y4=n);break;case 2:this._point=3,this._x5=t,this._y5=n;break;default:Tc(this,t,n)}this._x0=this._x1,this._x1=this._x2,this._x2=t,this._y0=this._y1,this._y1=this._y2,this._y2=n}};var iw=function t(n){function e(t){return new Nc(t,n)}return e.tension=function(n){return t(+n)},e}(0);kc.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._x2=this._y0=this._y1=this._y2=NaN,this._point=0},lineEnd:function(){(this._line||0!==this._line&&3===this._point)&&this._context.closePath(),this._line=1-this._line},point:function(t,n){switch(t=+t,n=+n,this._point){case 0:this._point=1;break;case 1:this._point=2;break;case 2:this._point=3,this._line?this._context.lineTo(this._x2,this._y2):this._context.moveTo(this._x2,this._y2);break;case 3:this._point=4;default:Tc(this,t,n)}this._x0=this._x1,this._x1=this._x2,this._x2=t,this._y0=this._y1,this._y1=this._y2,this._y2=n}};var ow=function t(n){function e(t){return new kc(t,n)}return e.tension=function(n){return t(+n)},e}(0);Ac.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._x2=this._y0=this._y1=this._y2=NaN, +this._l01_a=this._l12_a=this._l23_a=this._l01_2a=this._l12_2a=this._l23_2a=this._point=0},lineEnd:function(){switch(this._point){case 2:this._context.lineTo(this._x2,this._y2);break;case 3:this.point(this._x2,this._y2)}(this._line||0!==this._line&&1===this._point)&&this._context.closePath(),this._line=1-this._line},point:function(t,n){if(t=+t,n=+n,this._point){var e=this._x2-t,r=this._y2-n;this._l23_a=Math.sqrt(this._l23_2a=Math.pow(e*e+r*r,this._alpha))}switch(this._point){case 0:this._point=1,this._line?this._context.lineTo(t,n):this._context.moveTo(t,n);break;case 1:this._point=2;break;case 2:this._point=3;default:Ec(this,t,n)}this._l01_a=this._l12_a,this._l12_a=this._l23_a,this._l01_2a=this._l12_2a,this._l12_2a=this._l23_2a,this._x0=this._x1,this._x1=this._x2,this._x2=t,this._y0=this._y1,this._y1=this._y2,this._y2=n}};var uw=function t(n){function e(t){return n?new Ac(t,n):new Sc(t,0)}return e.alpha=function(n){return t(+n)},e}(.5);Cc.prototype={areaStart:Qb,areaEnd:Qb,lineStart:function(){this._x0=this._x1=this._x2=this._x3=this._x4=this._x5=this._y0=this._y1=this._y2=this._y3=this._y4=this._y5=NaN,this._l01_a=this._l12_a=this._l23_a=this._l01_2a=this._l12_2a=this._l23_2a=this._point=0},lineEnd:function(){switch(this._point){case 1:this._context.moveTo(this._x3,this._y3),this._context.closePath();break;case 2:this._context.lineTo(this._x3,this._y3),this._context.closePath();break;case 3:this.point(this._x3,this._y3),this.point(this._x4,this._y4),this.point(this._x5,this._y5)}},point:function(t,n){if(t=+t,n=+n,this._point){var e=this._x2-t,r=this._y2-n;this._l23_a=Math.sqrt(this._l23_2a=Math.pow(e*e+r*r,this._alpha))}switch(this._point){case 0:this._point=1,this._x3=t,this._y3=n;break;case 1:this._point=2,this._context.moveTo(this._x4=t,this._y4=n);break;case 2:this._point=3,this._x5=t,this._y5=n;break;default:Ec(this,t,n)}this._l01_a=this._l12_a,this._l12_a=this._l23_a,this._l01_2a=this._l12_2a,this._l12_2a=this._l23_2a,this._x0=this._x1,this._x1=this._x2,this._x2=t,this._y0=this._y1,this._y1=this._y2,this._y2=n}};var aw=function t(n){function e(t){return n?new Cc(t,n):new Nc(t,0)}return e.alpha=function(n){return t(+n)},e}(.5);zc.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._x2=this._y0=this._y1=this._y2=NaN,this._l01_a=this._l12_a=this._l23_a=this._l01_2a=this._l12_2a=this._l23_2a=this._point=0},lineEnd:function(){(this._line||0!==this._line&&3===this._point)&&this._context.closePath(),this._line=1-this._line},point:function(t,n){if(t=+t,n=+n,this._point){var e=this._x2-t,r=this._y2-n;this._l23_a=Math.sqrt(this._l23_2a=Math.pow(e*e+r*r,this._alpha))}switch(this._point){case 0:this._point=1;break;case 1:this._point=2;break;case 2:this._point=3,this._line?this._context.lineTo(this._x2,this._y2):this._context.moveTo(this._x2,this._y2);break;case 3:this._point=4;default:Ec(this,t,n)}this._l01_a=this._l12_a,this._l12_a=this._l23_a,this._l01_2a=this._l12_2a,this._l12_2a=this._l23_2a,this._x0=this._x1,this._x1=this._x2,this._x2=t,this._y0=this._y1,this._y1=this._y2,this._y2=n}};var cw=function t(n){function e(t){return n?new zc(t,n):new kc(t,0)}return e.alpha=function(n){return t(+n)},e}(.5);Pc.prototype={areaStart:Qb,areaEnd:Qb,lineStart:function(){this._point=0},lineEnd:function(){this._point&&this._context.closePath()},point:function(t,n){t=+t,n=+n,this._point?this._context.lineTo(t,n):(this._point=1,this._context.moveTo(t,n))}};var sw=function(t){return new Pc(t)};Dc.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x0=this._x1=this._y0=this._y1=this._t0=NaN,this._point=0},lineEnd:function(){switch(this._point){case 2:this._context.lineTo(this._x1,this._y1);break;case 3:Uc(this,this._t0,qc(this,this._t0))}(this._line||0!==this._line&&1===this._point)&&this._context.closePath(),this._line=1-this._line},point:function(t,n){var e=NaN;if(t=+t,n=+n,t!==this._x1||n!==this._y1){switch(this._point){case 0:this._point=1,this._line?this._context.lineTo(t,n):this._context.moveTo(t,n);break;case 1:this._point=2;break;case 2:this._point=3,Uc(this,qc(this,e=Rc(this,t,n)),e);break;default:Uc(this,this._t0,e=Rc(this,t,n))}this._x0=this._x1,this._x1=t,this._y0=this._y1,this._y1=n,this._t0=e}}},(Oc.prototype=Object.create(Dc.prototype)).point=function(t,n){Dc.prototype.point.call(this,n,t)},Fc.prototype={moveTo:function(t,n){this._context.moveTo(n,t)},closePath:function(){this._context.closePath()},lineTo:function(t,n){this._context.lineTo(n,t)},bezierCurveTo:function(t,n,e,r,i,o){this._context.bezierCurveTo(n,t,r,e,o,i)}},Bc.prototype={areaStart:function(){this._line=0},areaEnd:function(){this._line=NaN},lineStart:function(){this._x=[],this._y=[]},lineEnd:function(){var t=this._x,n=this._y,e=t.length;if(e)if(this._line?this._context.lineTo(t[0],n[0]):this._context.moveTo(t[0],n[0]),2===e)this._context.lineTo(t[1],n[1]);else for(var r=jc(t),i=jc(n),o=0,u=1;u=0&&(this._t=1-this._t,this._line=1-this._line)},point:function(t,n){switch(t=+t,n=+n,this._point){case 0:this._point=1,this._line?this._context.lineTo(t,n):this._context.moveTo(t,n);break;case 1:this._point=2;default:if(this._t<=0)this._context.lineTo(this._x,n),this._context.lineTo(t,n);else{var e=this._x*(1-this._t)+t*this._t;this._context.lineTo(e,this._y),this._context.lineTo(e,n)}}this._x=t,this._y=n}};var lw=function(t){return new Hc(t,.5)},hw=Array.prototype.slice,pw=function(t,n){if((r=t.length)>1)for(var e,r,i=1,o=t[n[0]],u=o.length;i=0;)e[n]=n;return e},vw=function(){function t(t){var o,u,a=n.apply(this,arguments),c=t.length,s=a.length,f=new Array(s);for(o=0;o0){for(var e,r,i,o=0,u=t[0].length;o0){for(var e,r=0,i=t[n[0]],o=i.length;r0&&(r=(e=t[n[0]]).length)>0){for(var e,r,i,o=0,u=1;u=a)return null;var c=t-i.site[0],s=n-i.site[1],f=c*c+s*s;do{i=o.cells[r=u],u=null,i.halfedges.forEach(function(e){var r=o.edges[e],a=r.left;if(a!==i.site&&a||(a=r.right)){var c=t-a[0],s=n-a[1],l=c*c+s*s;le?(e+r)/2:Math.min(0,e)||Math.max(0,r),o>i?(i+o)/2:Math.min(0,i)||Math.max(0,o))}function o(t){return[(+t[0][0]+ +t[1][0])/2,(+t[0][1]+ +t[1][1])/2]}function u(t,n,e){t.on("start.zoom",function(){a(this,arguments).start()}).on("interrupt.zoom end.zoom",function(){a(this,arguments).end()}).tween("zoom",function(){var t=this,r=arguments,i=a(t,r),u=m.apply(t,r),c=e||o(u),s=Math.max(u[1][0]-u[0][0],u[1][1]-u[0][1]),f=t.__zoom,l="function"==typeof n?n.apply(t,r):n,h=k(f.invert(c).concat(s/f.k),l.invert(c).concat(s/l.k));return function(t){if(1===t)t=l;else{var n=h(t),e=s/n[2];t=new Es(e,c[0]-n[0]*e,c[1]-n[1]*e)}i.zoom(null,t)}})}function a(t,n){for(var e,r=0,i=E.length;r0?wl(this).transition().duration(N).call(u,f,a):wl(this).call(n.transform,f)}}function h(){if(y.apply(this,arguments)){var n,e,r,i,o=a(this,arguments),u=t.event.changedTouches,c=u.length;for(Cs(),e=0;e +#include // exit +#include +#include +#include // getpid gethostname +#include // gettimeofday + +#include "basetypes.h" +#include "dclab_log.h" +#include "dclab_rpc.h" +//#include "dclab_trace_lib.h" +#include "polynomial.h" + + +// +// Utility routines +// + +// Return floor of log base2 of x, i.e. the number of bits-1 needed to hold x +int32 FloorLg(int32 x) { + int32 lg = 0; + int32 local_x = x; + if (local_x & 0xffff0000) {lg += 16; local_x >>= 16;} + if (local_x & 0xff00) {lg += 8; local_x >>= 8;} + if (local_x & 0xf0) {lg += 4; local_x >>= 4;} + if (local_x & 0xc) {lg += 2; local_x >>= 2;} + if (local_x & 0x2) {lg += 1; local_x >>= 1;} + return lg; +} + +// Put together an IPv4 address from four separate ints +uint32 MakeIP(int a, int b, int c, int d) {return (a << 24) | (b << 16) | (c << 8) | d;} + +// Pad a string out to length using pseudo-random characters. +// x is a pseudo-random seed and is updated by this routine +// s is the input character string to be padded and must be allocated big enough +// to hold at least length characters +// curlen is the current length of s, bytes to be retained +// padded_len is the desired new character length +// If curlen >= padded_len, s is returned unchanged. Else it is padded. +// Returns s in both cases. +// DOES NOT return a proper c string with trailing zero byte +char* PadToSimple(uint32* randseed, char* s, int curlen, int padded_len) { + char* p = s + curlen; // First byte off the end; + for (int i = 0; i < (padded_len - curlen); ++i) { + if ((i % 5) == 0) { + *p++ = '_'; + } else { + *p++ = "abcdefghijklmnopqrstuvwxyz012345"[*randseed & 0x1f]; + *randseed = POLYSHIFT32(*randseed); + } + } + return s; +} + + +char* PadTo(uint32* randseed, char* s, int baselen, int padded_len) { + if (baselen >= padded_len) {return s;} + + // Go faster for long strings by just padding out to 256 then copying + if (padded_len > 256) { + PadToSimple(randseed, s, baselen, 256); + for (int i = 256; i <= padded_len - 256; i += 256) { + memcpy(&s[i], s, 256); + } + memcpy(&s[(padded_len >> 8) << 8], s, padded_len & 255); + return s; + } + + PadToSimple(randseed, s, baselen, padded_len); + return s; +} + +// String form, updates randseed and str +void PadToStr(uint32* randseed, int padded_len, string* str) { + int32 baselen = str->size(); + if (baselen >= padded_len) {return;} + str->resize(padded_len); + char* str_ptr = const_cast(str->data()); + +#if 1 + // Go faster for long strings by just padding out to 256 then copying + if (padded_len > 256) { + PadToSimple(randseed, str_ptr, baselen, 256); + for (int i = 256; i <= padded_len - 256; i += 256) { + memcpy(&str_ptr[i], str_ptr, 256); + } + memcpy(&str_ptr[(padded_len >> 8) << 8], str_ptr, padded_len & 255); + return; + } +#endif + + PadToSimple(randseed, str_ptr, baselen, padded_len); +} + +// +// Formatting for printing +// + +// These all use a single static buffer. In real production code, these would +// all be std::string values, or something else at least as safe. +static const int kMaxDateTimeBuffer = 32; +static char gTempDateTimeBuffer[kMaxDateTimeBuffer]; + +static const int kMaxPrintBuffer = 256; +static char gTempPrintBuffer[kMaxPrintBuffer]; + + +// Turn seconds since the epoch into yyyymmdd_hhmmss +// Not valid after January 19, 2038 +const char* FormatSecondsDateTime(int32 sec) { + // if (sec == 0) {return "unknown";} // Longer spelling: caller expecting date + time_t tt = sec; + struct tm* t = localtime(&tt); + sprintf(gTempDateTimeBuffer, "%04d%02d%02d_%02d%02d%02d", + t->tm_year + 1900, t->tm_mon + 1, t->tm_mday, + t->tm_hour, t->tm_min, t->tm_sec); + return gTempDateTimeBuffer; +} + +// Turn seconds since the epoch into hhmmss (no date) +// Not valid after January 19, 2038 +const char* FormatSecondsTime(int32 sec) { + // if (sec == 0) {return "unk";} // Shorter spelling: caller expecting no date + time_t tt = sec; + struct tm* t = localtime(&tt); + sprintf(gTempDateTimeBuffer, "%02d%02d%02d", + t->tm_hour, t->tm_min, t->tm_sec); + return gTempDateTimeBuffer; +} + +// Turn usec since the epoch into yyyymmdd_hhmmss.usec +const char* FormatUsecDateTime(int64 us) { + // if (us == 0) {return "unknown";} // Longer spelling: caller expecting date + int32 seconds = us / 1000000; + int32 usec = us - (seconds * 1000000); + snprintf(gTempPrintBuffer, kMaxPrintBuffer, "%s.%06d", + FormatSecondsDateTime(seconds), usec); + return gTempPrintBuffer; +} + +// Turn usec since the epoch into ss.usec (no date) +// Note: initial 3d needed for sort of JSON file to but times in order +const char* FormatUsecTime(int64 us) { + // if (us == 0) {return "unk";} + int32 seconds = us / 1000000; + int32 usec = us - (seconds * 1000000); + snprintf(gTempPrintBuffer, kMaxPrintBuffer, "%3d.%06d", seconds, usec); + return gTempPrintBuffer; +} + +// TODO: map into a human-meaningful name +const char* FormatIpPort(uint32 ip, uint16 port) { + if (ip == 0) {return "unk:unk";} + snprintf(gTempPrintBuffer, kMaxPrintBuffer, "%d.%d.%d.%d:%d", + (ip >> 24) & 0xff, (ip >> 16) & 0xff, + (ip >> 8) & 0xff, (ip >> 0) & 0xff, port); + return gTempPrintBuffer; +} + +// TODO: map into a human-meaningful name +const char* FormatIp(uint32 ip) { + if (ip == 0) {return "unk:unk";} + snprintf(gTempPrintBuffer, kMaxPrintBuffer, "%d.%d.%d.%d", + (ip >> 24) & 0xff, (ip >> 16) & 0xff, + (ip >> 8) & 0xff, (ip >> 0) & 0xff); + return gTempPrintBuffer; +} + +// Turn RPC type enum into a meaningful name +const char* FormatType(uint32 type) { + return kRPCTypeName[type]; +} + +// TenLg length +const char* FormatLglen(uint8 len) { + snprintf(gTempPrintBuffer, kMaxPrintBuffer, "%d.%d", len / 10, len % 10); + return gTempPrintBuffer; +} + +// Just an rpcid as hex +const char* FormatRPCID(uint32 rpcid) { + snprintf(gTempPrintBuffer, kMaxPrintBuffer, "%08x", rpcid); + return gTempPrintBuffer; +} + +// Just an rpcid as decimal +const char* FormatRPCIDint(uint32 rpcid) { + snprintf(gTempPrintBuffer, kMaxPrintBuffer, "%u", rpcid); + return gTempPrintBuffer; +} + +// Method as C string with trailing '\0' +const char* FormatMethod(const char* method) { + if (method[0] == '\0') {return "unknown";} + memcpy(gTempPrintBuffer, method, 8); + gTempPrintBuffer[8] = '\0'; + return gTempPrintBuffer; +} + +// Turn status into meaningful name or leave as number +const char* FormatStatus(uint32 status) { + if (status < NumStatus) {return kRPCStatusName[status];} + // Unknown status values + snprintf(gTempPrintBuffer, kMaxPrintBuffer, "ERROR_%d", status); + return gTempPrintBuffer; +} + +// Just show length in decimal +const char* FormatLength(uint32 length) { + snprintf(gTempPrintBuffer, kMaxPrintBuffer, "%d", length); + return gTempPrintBuffer; +} + +// Turn fixed-field-width data into C string with trailing '\0' +// We expect a delimited string with 4-byte length on front +// We only do the first of possibly two strings +const char* FormatData(const uint8* data, int fixed_width) { + int trunclen = (fixed_width >= kMaxLogDataSize) ? kMaxLogDataSize : fixed_width; + for (int i = 0; i < trunclen; ++i) { + uint8 c = data[i]; + if (c <= ' ') {c = '.';} // Turn any bytes of delimited length into dots + gTempPrintBuffer[i] = c; + } + gTempPrintBuffer[trunclen] = '\0'; + +#if 1 + // Suppress trailing spaces + for (int i = trunclen - 1; i >= 0; --i) { + if (gTempPrintBuffer[i] == ' ') { + gTempPrintBuffer[i] = '\0'; + } else { + break; + } + } +#endif + return gTempPrintBuffer; +} + + + + + +// In a production environment, use std::string or something safer +static char tempLogFileName[256]; + +// Construct a name for opening a log file, passing in name of program from command line +// name is program_time_host_pid +const char* MakeLogFileName(const char* argv0) { + time_t tt; + const char* timestr; + char hostnamestr[256]; + int pid; + + const char* slash = strrchr(argv0, '/'); + // Point to first char of image name + if (slash == NULL) { + slash = argv0; + } else { + slash = slash + 1; // over the slash + } + + tt = time(NULL); + timestr = FormatSecondsDateTime(tt); + gethostname(hostnamestr, 256) ; + hostnamestr[255] = '\0'; + pid = getpid(); + + sprintf(tempLogFileName, "%s_%s_%s_%d.log", + slash, timestr, hostnamestr, pid); + return tempLogFileName; +} + +// Open logfile for writing. Exit program on any error +// Returns the open file. +FILE* OpenLogFileOrDie(const char* fname) { + FILE* logfile = fopen(fname, "wb"); + if (logfile == NULL) { + fprintf(stderr, "%s did not open\n", fname); + exit(0); + } + return logfile; +} + + + + +// +// Printing log records +// + + + +// Convenience routine for header of printed log records +void PrintLogRecordHeader(FILE* f) { + fprintf(f, + "REQ_send_time REQ_rcv_time RESP_send_time RESP_rcv_time " + "CLIENT_ip:port SERVER_ip:port " + "RPCID PARENT " + "L1 L2 TYPE " + "METHOD STATUS " + "LEN DATA " + "\n"); +} + + +// Print one binary log record to file f +void PrintLogRecord(FILE* f, const BinaryLogRecord* lr) { + fprintf(f, "%s ", FormatUsecDateTime(lr->req_send_timestamp)); + fprintf(f, "%s ", FormatUsecTime(lr->req_rcv_timestamp)); + fprintf(f, "%s ", FormatUsecTime(lr->resp_send_timestamp)); + fprintf(f, "%s ", FormatUsecTime(lr->resp_rcv_timestamp)); + + fprintf(f, "%s ", FormatIpPort(lr->client_ip, lr->client_port)); + fprintf(f, "%s ", FormatIpPort(lr->server_ip, lr->server_port)); + + fprintf(f, "%s ", FormatLglen(lr->lglen1)); + fprintf(f, "%s ", FormatLglen(lr->lglen2)); + fprintf(f, "%s ", FormatRPCID(lr->rpcid)); + fprintf(f, "%s ", FormatRPCID(lr->parent)); + + fprintf(f, "%s ", FormatType(lr->type)); + fprintf(f, "%s ", FormatMethod(lr->method)); + fprintf(f, "%s ", FormatStatus(lr->status)); + + fprintf(f, "%s ", FormatLength(lr->datalength)); + fprintf(f, "%s ", FormatData(lr->data, kMaxLogDataSize)); + fprintf(f, "\n"); +} + +// Print one binary log record to file f +void PrintLogRecordAsJson(FILE* f, const BinaryLogRecord* lr, uint64 basetime_usec) { + fprintf(f, "["); + fprintf(f, "%s, ", FormatUsecTime(lr->req_send_timestamp - basetime_usec)); + fprintf(f, "%s, ", FormatUsecTime(lr->req_rcv_timestamp - basetime_usec)); + fprintf(f, "%s, ", FormatUsecTime(lr->resp_send_timestamp - basetime_usec)); + fprintf(f, "%s, ", FormatUsecTime(lr->resp_rcv_timestamp - basetime_usec)); + + //fprintf(f, "\"%s\", ", FormatIpPort(lr->client_ip, lr->client_port)); + //fprintf(f, "\"%s\", ", FormatIpPort(lr->server_ip, lr->server_port)); + fprintf(f, "\"%s\", ", FormatIp(lr->client_ip)); + fprintf(f, "\"%s\", ", FormatIp(lr->server_ip)); + + fprintf(f, "%s, ", FormatLglen(lr->lglen1)); + fprintf(f, "%s, ", FormatLglen(lr->lglen2)); + fprintf(f, "%s, ", FormatRPCIDint(lr->rpcid)); + fprintf(f, "%s, ", FormatRPCIDint(lr->parent)); + + fprintf(f, "\"%s\", ", FormatType(lr->type)); + fprintf(f, "\"%s\", ", FormatMethod(lr->method)); + fprintf(f, "\"%s\", ", FormatStatus(lr->status)); + + fprintf(f, "%s, ", FormatLength(lr->datalength)); + fprintf(f, "\"%s\"", FormatData(lr->data, kMaxLogDataSize)); + fprintf(f, "],\n"); +} + +void PrintRPC(FILE* f, const RPC* rpc) { + // Header + const RPCHeader* hdr = rpc->header; + fprintf(f, "%s ", FormatUsecDateTime(hdr->req_send_timestamp)); + fprintf(f, "%s ", FormatUsecTime(hdr->req_rcv_timestamp)); + fprintf(f, "%s ", FormatUsecTime(hdr->resp_send_timestamp)); + fprintf(f, "%s ", FormatUsecTime(hdr->resp_rcv_timestamp)); + + fprintf(f, "%s ", FormatIpPort(hdr->client_ip, hdr->client_port)); + fprintf(f, "%s ", FormatIpPort(hdr->server_ip, hdr->server_port)); + + fprintf(f, "%s ", FormatRPCID(hdr->rpcid)); + fprintf(f, "%s ", FormatRPCID(hdr->parent)); + + fprintf(f, "%s ", FormatLglen(hdr->lglen1)); + fprintf(f, "%s ", FormatLglen(hdr->lglen2)); + fprintf(f, "%s ", FormatType(hdr->type)); + + fprintf(f, "%s ", FormatMethod(hdr->method)); + fprintf(f, "%s ", FormatStatus(hdr->status)); + fprintf(f, "%s ", FormatLength(rpc->datalen)); + fprintf(f, "%s ", FormatData(rpc->data, rpc->datalen)); + + fprintf(f, "\n"); +} + +void RPCToLogRecord(const RPC* rpc, BinaryLogRecord* lr) { + const RPCHeader* hdr = rpc->header; + lr->rpcid = hdr->rpcid; + lr->parent = hdr->parent; + lr->req_send_timestamp = hdr->req_send_timestamp; + lr->req_rcv_timestamp = hdr->req_rcv_timestamp; + lr->resp_send_timestamp = hdr->resp_send_timestamp; + lr->resp_rcv_timestamp = hdr->resp_rcv_timestamp; + + lr->client_ip = hdr->client_ip; + lr->client_port = hdr->client_port; + lr->server_ip = hdr->server_ip; + lr->server_port = hdr->server_port; + + lr->lglen1 = hdr->lglen1; + lr->lglen2 = hdr->lglen2; + lr->type = hdr->type; + + memcpy(lr->method, hdr->method, 8); + lr->status = hdr->status; + lr->datalength = rpc->datalen; + if (rpc->datalen >= kMaxLogDataSize) { + memcpy(lr->data, rpc->data, kMaxLogDataSize); + } else { + memset(lr->data, 0, kMaxLogDataSize); + memcpy(lr->data, rpc->data, rpc->datalen); + } +} + +void LogRPC(FILE* logfile, const RPC* rpc) { + BinaryLogRecord lr; + RPCToLogRecord(rpc, &lr); + fwrite(&lr, 1, sizeof(BinaryLogRecord), logfile); +} + + +// Print error message to stderr from system errno and terminate +void Error(const char* msg) { + perror(msg); + exit(EXIT_FAILURE); +} + +// Print error message to stderr from supplied errornum and terminate +void Error(const char* msg, int errornum) { + fprintf(stderr, "%s: %s\n", msg, strerror(errornum)); + exit(EXIT_FAILURE); +} + +// Print error message to stderr from supplied msg2 and terminate +void Error(const char* msg, const char* msg2) { + fprintf(stderr, "%s: %s\n", msg, msg2); + exit(EXIT_FAILURE); +} + +// Print error message to stderr from system errno and return +void ErrorNoFail(const char* msg) { + perror(msg); +} + + diff --git a/book-user-code/dclab_log.h b/book-user-code/dclab_log.h new file mode 100644 index 000000000000..c2bef16b6706 --- /dev/null +++ b/book-user-code/dclab_log.h @@ -0,0 +1,107 @@ +// Simple binary log file format +// This defines a 96-byte binary log record and routines to manipulate it. +// Our dclab client-server routines will use this to log all their activity +// +// Included are routines to create log file names and to print binary log records as ASCII. +// +// Copyright 2021 Richard L. Sites + +#ifndef __DCLAB_LOG_H__ +#define __DCLAB_LOG_H__ + +#include + +# include + +#include "basetypes.h" +#include "dclab_rpc.h" + +using std::string; + +static const int kMaxLogDataSize = 24; + +typedef struct { + uint32 rpcid; + uint32 parent; + int64 req_send_timestamp; // usec since the epoch, client clock + int64 req_rcv_timestamp; // usec since the epoch, server clock + int64 resp_send_timestamp; // usec since the epoch, server clock + int64 resp_rcv_timestamp; // usec since the epoch, client clock + // 40 bytes + + uint32 client_ip; + uint32 server_ip; + uint16 client_port; + uint16 server_port; + uint8 lglen1; // 10 * lg(request data length in bytes) + uint8 lglen2; // 10 * lg(response data length in bytes) + uint16 type; // An RPCType + // 16 bytes + + char method[8]; + // 64 bytes + + uint32 status; // 0 = success, other = error code + uint32 datalength; // full length transmitted + // 72 bytes to here + + uint8 data[kMaxLogDataSize]; // truncated, zero filled + // 96 bytes +} BinaryLogRecord; + + +// Utility routines + +// Return floor of log base2 of x, i.e. the number of bits needed to hold x +int32 FloorLg(int32 x); + +// Put together an IPv4 address from four separate ints +uint32 MakeIP(int a, int b, int c, int d); + +// Turn IPv4:port into a printable string +const char* FormatIpPort(uint32 ip, uint16 port); + + +// Pad a string out to length using pseudo-random characters. +// x is a pseduo-random seed and is updated by this routine +// s is the input character string to be padded and must be allocated big enough +// to hold at least length characters +// curlen is the current length of s, bytes to be retained +// padded_len is the desired new character length +// If curlen >= padded_len, s is returned unchanged. Else it is padded. +// Returns s in both cases. +char* PadTo(uint32* x, char* s, int curlen, int padded_len); + +// String form, updates randseed and str +void PadToStr(uint32* randseed, int padded_len, string* str); + + +// Construct a name for opening a log file, passing in name of program from command line +// Returns the resulting name, which is of the form program_time_host_pid +const char* MakeLogFileName(const char* argv0); + +// Open logfile for writing. Exit program on any error +// Returns the open file. +FILE* OpenLogFileOrDie(const char* fname); + + +// Convenience routine for header of printed log records +void PrintLogRecordHeader(FILE* f); + +// Print one binary log record to file f +void PrintLogRecord(FILE* f, const BinaryLogRecord* lr); +void PrintLogRecordAsJson(FILE* f, const BinaryLogRecord* lr, uint64 basetime_usec); +void PrintRPC(FILE* f, const RPC* rpc); +void RPCToLogRecord(const RPC* rpc, BinaryLogRecord* lr); +void LogRPC(FILE* logfile, const RPC* rpc); + +// Print error messages to stderr +void Error(const char* msg); +void Error(const char* msg, int errornum); +void Error(const char* msg, const char* msg2); +void ErrorNoFail(const char* msg); + +#endif // __DCLAB_LOG_H__ + + + diff --git a/book-user-code/dclab_rpc.cc b/book-user-code/dclab_rpc.cc new file mode 100644 index 000000000000..d400d54ff5c3 --- /dev/null +++ b/book-user-code/dclab_rpc.cc @@ -0,0 +1,380 @@ +// dclab_rpc.cc +// Copyright 2021 Richard L. Sites + +#include +#include +#include + +#include + +#include "basetypes.h" +#include "dclab_log.h" +#include "dclab_rpc.h" +#include "kutrace_lib.h" + +using std::string; + +// 10 * lg(x) rounded to nearest integer, with lg(zero) mapped to 0 +static const uint8 kTenLgTable[256] = { + 0, 0, 10, 16, 20, 23, 26, 28, 30, 32, 33, 35, 36, 37, 38, 39, +40, 41, 42, 42, 43, 44, 45, 45, 46, 46, 47, 48, 48, 49, 49, 50, +50, 50, 51, 51, 52, 52, 52, 53, 53, 54, 54, 54, 55, 55, 55, 56, +56, 56, 56, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, 60, 60, +60, 60, 60, 61, 61, 61, 61, 61, 62, 62, 62, 62, 62, 63, 63, 63, +63, 63, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 66, 66, +66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 68, 68, 68, 68, +68, 68, 68, 68, 69, 69, 69, 69, 69, 69, 69, 69, 70, 70, 70, 70, +70, 70, 70, 70, 70, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 72, +72, 72, 72, 72, 72, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73, +73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 74, 75, +75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 76, 76, 76, 76, +76, 76, 76, 76, 76, 76, 76, 76, 76, 77, 77, 77, 77, 77, 77, 77, +77, 77, 77, 77, 77, 77, 77, 77, 78, 78, 78, 78, 78, 78, 78, 78, +78, 78, 78, 78, 78, 78, 78, 79, 79, 79, 79, 79, 79, 79, 79, 79, +79, 79, 79, 79, 79, 79, 79, 79, 80, 80, 80, 80, 80, 80, 80, 80, +}; + +// 2**0.0 through 2** 0.9 +static const double kPowerTwoTenths[10] = { + 1.0000, 1.0718, 1.1487, 1.2311, 1.3195, + 1.4142, 1.5157, 1.6245, 1.7411, 1.8661 +}; + +// Make the 16-byte marker for an RPC on the wire +void MakeRPCMarker(const RPC* rpc, RPCMarker* marker) { + marker->signature = kMarkerSignature; + marker->headerlen = rpc->headerlen; + marker->datalen = rpc->datalen; + marker->checksum = MarkerChecksum(marker); +} + +// +// The main transmission routines +// + +// For len = 0, returns true without doing a read and without using the buffer pointer +bool ReadExactlyLenBytes(int sockfd, uint8* buffer, int32 len) { + uint8* nextbyte = buffer; + int32 neededbytes = len; + while (neededbytes > 0) { + // Socket data may come in pieces of a few bytes each + // n = -1 indicates an error. n = 0 indicates closed socket ?? + int n = read(sockfd, nextbyte, neededbytes); + // if (n == 0) {ErrorNoFail("ReadExactly got 0 bytes"); return false;} + if (n == 0) {return false;} // Zero bytes is normal at end + if (n <= 0) {Error("ReadExactly error"); return false;} + nextbyte += n; + neededbytes -= n; + } + return true; +} + +// Read next incoming RPC request/response. +// Will block until all bytes arrive or read fails (e.g. connection drops) +// +// If successful, +// sets header to point to allocated buffer and sets headerlen to match, +// sets data to point to another allocated buffer and sets datalen to match. +// Caller must do delete[] on both. +// +// Returns true if a good request is found, false on error of any kind. +// Later: if synchronizing on the marker is a problem, try to recover. +// +// Also calculate the same xor hash over the first 32 bytes of the message that +// the kernel does, for tracking message first packet vs. receipt here in user code +bool ReadOneRPC(int sockfd, RPC* rpc, uint32* hash32) { + rpc->header = NULL; + rpc->headerlen = 0; + rpc->data = NULL; + rpc->datalen = 0; + + // Read the RPC marker + RPCMarker marker; + uint8* markerbuffer = reinterpret_cast(&marker); + bool ok = true; + if (hash32 != NULL) {*hash32 = 0;} + + ok &= ReadExactlyLenBytes(sockfd, markerbuffer, sizeof(RPCMarker)); + + // If we read zero bytes because no command arrived, ok will be set false. + // Client likely closed the socket, so we bail + if (!ok) {return ok;} + + // We now have bytes for a complete marker + if (!ValidMarker(&marker)) {ErrorBadMarker(&marker);} + + // Read the RPCheader + if (marker.headerlen > 0) { + uint8* hdr = new uint8[marker.headerlen]; + rpc->header = reinterpret_cast(hdr); + rpc->headerlen = marker.headerlen; + ok &= ReadExactlyLenBytes(sockfd, hdr, rpc->headerlen); + } + + // We now have a complete valid marker; gather the rest of the RPC bytes + uint32 packet_hash = 0; + for (int i = 0; i < 4; ++i) {packet_hash ^= (reinterpret_cast(&marker))[i];} + for (int i = 0; i < 4; ++i) {packet_hash ^= (reinterpret_cast(rpc->header))[i];} + if (hash32 != NULL) {*hash32 = packet_hash;} + + // 2021 Add user-mode receipt with RPCID and 16-bit message first packet hash to trace + // RPCID in to 16 bits of arg, hash16 in bottom + // No, back to full 32-bit hash + ////uint32 rpcid16 = rpcid32_to_rpcid16(rpc->header->rpcid); + ////uint32 hash16 = hash32_to_hash16(packet_hash); + ////kutrace::addevent(KUTRACE_RX_USER, (rpcid16 << 16) | hash16); + kutrace::addevent(KUTRACE_RX_USER, packet_hash); + + // Read the data + if (marker.datalen > 0) { + rpc->data = new uint8[marker.datalen]; + rpc->datalen = marker.datalen; + ok &= ReadExactlyLenBytes(sockfd, rpc->data, rpc->datalen); + } + + return ok; +} + +// Send one RPC over the wire: marker, header, data +bool SendOneRPC(int sockfd, const RPC* rpc, uint32* hash32) { + int iret; + RPCMarker mymarker; + MakeRPCMarker(rpc, &mymarker); + + uint32 packet_hash = 0; + for (int i = 0; i < 4; ++i) {packet_hash ^= (reinterpret_cast(&mymarker))[i];} + for (int i = 0; i < 4; ++i) {packet_hash ^= (reinterpret_cast(rpc->header))[i];} + if (hash32 != NULL) {*hash32 = packet_hash;} + + // 2021 Add user-mode send with RPCID and 16-bit message first packet hash to trace + // RPCID in to 16 bits of arg, hash16 in bottom + // No, back to full 32-bit hash + ////uint32 rpcid16 = rpcid32_to_rpcid16(rpc->header->rpcid); + ////uint32 hash16 = hash32_to_hash16(packet_hash); + ////kutrace::addevent(KUTRACE_TX_USER, (rpcid16 << 16) | hash16); + kutrace::addevent(KUTRACE_TX_USER, packet_hash); + +#if 1 + // Make a single message to transmit + string msg; + msg.reserve(sizeof(RPCMarker) + rpc->headerlen + rpc->datalen); + msg.append(reinterpret_cast(&mymarker), sizeof(RPCMarker)); + msg.append(reinterpret_cast(rpc->header), rpc->headerlen); + msg.append(reinterpret_cast(rpc->data), rpc->datalen); + iret = write(sockfd, msg.data(), msg.length()); + if (iret < 0) {Error("write message");} + return true; + +#else + + iret = write(sockfd, &mymarker, sizeof(RPCMarker)); + if (iret < 0) {Error("write marker");} + + if (rpc->headerlen > 0) { + iret= write(sockfd, rpc->header, rpc->headerlen); + if (iret < 0) {Error("write header");} + } + if (rpc->datalen > 0) { + iret = write(sockfd, rpc->data, rpc->datalen); + if (iret < 0) {Error("write data");} + } +#endif + + return true; +} + + +// +// Some utility routines +// + +uint32 MarkerChecksum(const RPCMarker* marker) { + return marker->signature + ((marker->headerlen << 20) ^ (marker->datalen)); +} + +// Client and server both deal in little-endian byte streams, so no ntoh* needed +bool ValidMarker(const RPCMarker* marker) { + if (marker->signature != kMarkerSignature ) {return false;} + if (marker->headerlen > kMaxRPCHeaderLength) {return false;} + if (marker->datalen > kMaxRPCDataLength) {return false;} + if (marker->checksum != MarkerChecksum(marker)) {return false;} + return true; +} + +void ErrorBadMarker(const RPCMarker* marker) { + const uint8* umarker = reinterpret_cast(marker); + fprintf(stderr, "Invalid marker received: "); + // Print what we got to make it possible to understand what went wrong + for (int i = 0; i < 16; ++i) { + fprintf(stderr, "%02x", umarker[i]); + if ((i & 3) == 3) {fprintf(stderr, " ");} + } + fprintf(stderr, "\n"); + exit(EXIT_FAILURE); +} + + +// Convert uint32 to single-byte 10 * lg(x) +uint8 TenLg(uint32 x) { + if (x == 0) {return 0;} + if (x >= 47453132) {return 255;} + int32 floorlg = FloorLg(x); // returns 7 for 255, 8 for 256 + uint8 tenlg = 0; + uint32 local_x = x; + if (floorlg > 7) { + local_x >>= (floorlg - 7); + tenlg += (floorlg - 7) * 10; + } + tenlg += kTenLgTable[local_x]; +//fprintf(stderr, "TenLg %d = %d %d %d %d\n", x, floorlg, local_x, kTenLgTable[local_x],tenlg); + return tenlg; +} + +// Convert ten * lg(x) back into x +uint64 TenPow(uint8 xlg) { + int powertwo = xlg / 10; + int fraction = xlg % 10; + uint64 retval = 1llu << powertwo; + retval = (retval * kPowerTwoTenths[fraction]) + 0.5; + return retval; +} + + +// Copy an RPC, copying sub-pieces +void CopyRPC(const RPC* srcrpc, RPC* dstrpc) { + dstrpc->header = new RPCHeader; + dstrpc->headerlen = sizeof(RPCHeader); + memcpy(dstrpc->header, srcrpc->header, sizeof(RPCHeader)); + dstrpc->data = new uint8[srcrpc->datalen]; + dstrpc->datalen = srcrpc->datalen; + memcpy(dstrpc->data, srcrpc->data, srcrpc->datalen); +} + +// Copy an RPC, copying header sub-piece, leaving dst data unchanged +void CopyRPCHeader(const RPC* srcrpc, RPC* dstrpc) { + dstrpc->header = new RPCHeader; + dstrpc->headerlen = sizeof(RPCHeader); + memcpy(dstrpc->header, srcrpc->header, sizeof(RPCHeader)); +} + +// Copy an RPC, copying data sub-piece, leaving dst header unchanged +void CopyRPCData(const RPC* srcrpc, RPC* dstrpc) { + dstrpc->data = new uint8[srcrpc->datalen]; + dstrpc->datalen = srcrpc->datalen; + memcpy(dstrpc->data, srcrpc->data, srcrpc->datalen); +} + +// Free the header and data previously allocated +bool FreeRPC(RPC* rpc) { + delete rpc->header; + rpc->header = NULL; + rpc->headerlen = 0; + delete[] rpc->data; + rpc->data = NULL; + rpc->datalen = 0; + return true; +} + +// Free just the data previously allocated +bool FreeRPCDataOnly(RPC* rpc) { + rpc->header = NULL; + rpc->headerlen = 0; + delete[] rpc->data; + rpc->data = NULL; + rpc->datalen = 0; + return true; +} + + +// Our simple delimited strings on the wire have a 4-byte length on the front +// We completely ignore endianness issues here +// Extract a delimited string from RPC data: length, string +// arg points to a uint32 N followed by N bytes +// Return the N bytes as a string and update arg to point to the following byte +string GetStringArg(const uint8** arg) { + uint32 len = *reinterpret_cast(*arg); + *arg += sizeof(uint32); + const char* s = reinterpret_cast(*arg); + *arg += len; + return string(s, len); +} + +// Insert a delimited buffer into RPC data: length, string +void PutStringRPC(const char* str, int strlen, RPC* rpc) { + uint32 len = strlen; + rpc->datalen = sizeof(uint32) + len; + rpc->data = new uint8[rpc->datalen]; + uint8* d = rpc->data; + + // Put in length, then string + *reinterpret_cast(d) = len; + d += sizeof(uint32); + memcpy(reinterpret_cast(d), str, len); +} + +// Insert two delimited buffers into RPC data: length, string, length, string +void PutStringRPC2(const char* str1, int str1len, const char* str2, int str2len, RPC* rpc) { + uint32 len1 = str1len; + uint32 len2 = str2len; + rpc->datalen = 2 * sizeof(uint32) + len1 + len2; + rpc->data = new uint8[rpc->datalen]; + uint8* d = rpc->data; + + // Put in length, then str1 + *reinterpret_cast(d) = len1; + d += sizeof(uint32); + memcpy(reinterpret_cast(d), str1, len1); + d += len1; + + // Put in length, then str2 + *reinterpret_cast(d) = len2; // May well be unaligned + d += sizeof(uint32); + memcpy(reinterpret_cast(d), str2, len2); +} + + +// Insert a delimited string into RPC data: length, string +void PutStringRPC(const string& str, RPC* rpc) { + uint32 len = str.size(); + rpc->datalen = sizeof(uint32) + len; + rpc->data = new uint8[rpc->datalen]; + uint8* d = rpc->data; + + // Put in length, then string + *reinterpret_cast(d) = len; + d += sizeof(uint32); + memcpy(reinterpret_cast(d), str.data(), len); +} + +// Insert two delimited strings into RPC data: length, string, length, string +void PutStringRPC2(const string& str1, const string& str2, RPC* rpc) { + uint32 len1 = str1.size(); + uint32 len2 = str2.size(); + rpc->datalen = 2 * sizeof(uint32) + len1 + len2; + rpc->data = new uint8[rpc->datalen]; + uint8* d = rpc->data; + + // Put in length, then str1 + *reinterpret_cast(d) = len1; + d += sizeof(uint32); + memcpy(reinterpret_cast(d), str1.data(), len1); + d += len1; + + // Put in length, then str2 + *reinterpret_cast(d) = len2; // May well be unaligned + d += sizeof(uint32); + memcpy(reinterpret_cast(d), str2.data(), len2); +} + + + + + + + + + + + + + diff --git a/book-user-code/dclab_rpc.h b/book-user-code/dclab_rpc.h new file mode 100644 index 000000000000..278162b5329b --- /dev/null +++ b/book-user-code/dclab_rpc.h @@ -0,0 +1,174 @@ +// Simple RPC header dclab_rpc.h +// This defines an RPC header and the routines to manipulate it. +// Our dclab client-server routines will use this for all RPC messages +// +// Copyright 2021 Richard L. Sites + + +#ifndef __DCLAB_RPC_H__ +#define __DCLAB_RPC_H__ + +#include +#include +#include "basetypes.h" + +using std::string; + +static const uint32 kMarkerSignature = 3519354853u; // Arbitrary unlikely constant +static const int kMaxRPCHeaderLength = (4 * 1024) - 1; +static const int kMaxRPCDataLength = (16 * 1024 * 1024) - 1; + +enum RPCType { + ReqSendType = 0, + ReqRcvType, + RespSendType, + RespRcvType, + TextType, + NumType // must be last +}; + +enum RPCStatus { + SuccessStatus = 0, + FailStatus, + TooBusyStatus, + NumStatus // must be last +}; + +// Padded to 8 characters for printing +static const char* const kRPCTypeName[] = { + "ReqSend ", "ReqRcv ", "RespSend", "RespRcv ", "Text " +}; + +// Padded to 8 characters for printing +static const char* const kRPCStatusName[] = { + "Success ", "Fail ", "TooBusy ", +}; + +// Struct transmitted on the wire +// We completely ignore endianness issues here +typedef struct { + // Marker for our RPC messages. One message may be bigger than one packet + // All our messages are aligned multiples of four bytes, so scanning for + // a marker only has to look at aligned words. + // The marker is designed for quick detection/rejection in any packet -- + // First word is unlikely bit pattern so non-marker quickly fails detection + // Second word has 20 high-order zeros in marker, low 12 bits nonzero + // Third word has 12 high-order zeros in marker + // Fourth word is simple checksum of previous three and again is unlikely bit pattern + uint32 signature; // Always kMarkerSignature + uint32 headerlen; + uint32 datalen; + uint32 checksum; // = signature + ((headerlen << 20) ^ datalen) + // 16 bytes +} RPCMarker; + +// Struct transmitted on the wire +// We completely ignore endianness issues here +typedef struct { + // rpcid is at the front so that kernel TCP Patches can find it easily + uint32 rpcid; + uint32 parent; + + int64 req_send_timestamp; // usec since the epoch, client clock + int64 req_rcv_timestamp; // usec since the epoch, server clock + int64 resp_send_timestamp; // usec since the epoch, server clock + int64 resp_rcv_timestamp; // usec since the epoch, client clock + // 40 bytes + + uint32 client_ip; + uint32 server_ip; + uint16 client_port; + uint16 server_port; + uint8 lglen1; // 10 * lg(request data length in bytes) + uint8 lglen2; // 10 * lg(response data length in bytes) + uint16 type; // request or response, client-side or server-side, etc. + // 16 bytes + + char method[8]; + // 8 bytes + + uint32 status; // 0 = success, other = error code + uint32 pad; // Sent as zero. Makes data 8B aligned + // 72 bytes to here + + uint8 data[0]; // [B2] byte length is in marker above + // [B3] off the end of data +} RPCHeader; + +// Struct just in memory +// An RPC is represented in memory as three pieces: marker, header and data. +// The RPC marker on the wire specifies the lengths of the other two, which are +// copied into the RPC struct for convenience. +// The RPC header specifies timestamps and which machine/method is being called, +// while the data is an arbitrary byte stream of arguments or results for the call. +typedef struct { + RPCHeader* header; + uint8* data; + int32 headerlen; + int32 datalen; +} RPC; + +// The main transmission routines +bool ReadExactlyLenBytes(int sockfd, uint8* buffer, int32 len); +bool ReadOneRPC(int sockfd, RPC* rpc, uint32* hash32); +bool SendOneRPC(int sockfd, const RPC* rpc, uint32* hash32); + +// Some utility routines +uint32 MarkerChecksum(const RPCMarker* marker); +bool ValidMarker(const RPCMarker* marker); +void ErrorBadMarker(const RPCMarker* marker); + +// Convert uint32 to single-byte 10 * lg(x) +uint8 TenLg(uint32 x); + +// Convert 10 * lg(x) back into x +uint64 TenPow(uint8 xlg); + +// Copy an RPC, copying all sub-pieces +void CopyRPC(const RPC* srcrpc, RPC* dstrpc); + +// Copy an RPC, copying header sub-piece, leaving dst data unchanged +void CopyRPCHeader(const RPC* srcrpc, RPC* dstrpc); + +// Copy an RPC, copying data sub-piece, leaving dst header unchanged +void CopyRPCData(const RPC* srcrpc, RPC* dstrpc); + +// Free the header and data previously allocated +bool FreeRPC(RPC* rpc); + +// Free just the data previously allocated +bool FreeRPCDataOnly(RPC* rpc); + +// Our simple delimited strings on the wire have a 4-byte length on the front +// We completely ignore endianness issues here +// Extract a delimited string from RPC data: length, string +string GetStringArg(const uint8** arg); + +// Insert a delimited buffer into RPC data: length, string +void PutStringRPC(const char* str, int strlen, RPC* rpc); + +// Insert two delimited buffers into RPC data: length, string, length, string +void PutStringRPC2(const char* str1, int str1len, const char* str2, int str2len, RPC* rpc); + +// Insert a delimited string into RPC data: length, string +void PutStringRPC(const string& str, RPC* rpc); + +// Insert two delimited strings into RPC data: length, string, length, string +void PutStringRPC2(const string& str1, const string& str2, RPC* rpc); + +// Fold 32-bit rpcid to 16-bit one +// 32-bit rpcid is never zero. If low bits are zero, use high bits +inline uint32 rpcid32_to_rpcid16(uint32 rpcid) { + uint32 tempid = rpcid & 0xffff; + return (tempid == 0) ? (rpcid >> 16) : tempid; +} + +// Fold 32-bit packet hash to 16-bit one +inline uint32 hash32_to_hash16(uint32 hash32) { + return (hash32 ^ (hash32 >> 16)) & 0xFFFF; +} + +#endif // __DCLAB_RPC_H__ + + + diff --git a/book-user-code/disk_readhog.cc b/book-user-code/disk_readhog.cc new file mode 100644 index 000000000000..02d7c7b618a5 --- /dev/null +++ b/book-user-code/disk_readhog.cc @@ -0,0 +1,119 @@ +// Little program to read disk/SSD continuously +// against it and observe the interference. +// +// Copyright 2021 Richard L. Sites +// +// Design goals: Run for about 60 seconds reading continuously +// + +// Usage: disk_readhog [MB to create] +// Compile with +// g++ -O2 disk_readhog.cc -o disk_readhog + +#include +#include // open +#include +#include +#include // open +#include // gettimeofday +#include // lseek +#include +#include // lseek + +#include "basetypes.h" +#include "polynomial.h" + +//static const size_t kReadBlockSize = 64 * 1024; // Read 64KB at a time +//static const int kBlocksPerMB = 16; +static const size_t kReadBlockSize = 256 * 1024; // Read 256KB at a time +static const int kBlocksPerMB = 4; + +// Return current time of day as seconds and fraction since January 1, 1970 +double GetSec() { + struct timeval tv; + gettimeofday(&tv, NULL); + return tv.tv_sec + (tv.tv_usec / 1000000.0); +} + +// ---- For creating a test file ---- +// Pad a string out to length using pseudo-random characters. +// x is a pseudo-random seed and is updated by this routine +// s is the input character string to be padded and must be allocated big enough +// to hold at least length characters +// curlen is the current length of s, bytes to be retained +// padded_len is the desired new character length +// If curlen >= padded_len, s is returned unchanged. Else it is padded. +// Returns s in both cases. +// DOES NOT return a proper c string with trailing zero byte +char* PadTo(uint32* x, char* s, int curlen, int padded_len) { + char* p = s + curlen; // First byte off the end; + for (int i = 0; i < (padded_len - curlen); ++i) { + if ((i % 5) == 0) { + *p++ = '_'; + } else { + *p++ = "abcdefghijklmnopqrstuvwxyz012345"[*x & 0x1f]; + *x = POLYSHIFT32(*x); + } + } + return s; +} + +// Creates a pseudo-random 1MB buffer and then writes it multiple times. +void MakeTestFile(const char* fname, int sizeinmb) { + char* temp = (char*)malloc(1024 * 1024); + FILE* f = fopen(fname, "wb"); + if (f == NULL) {fprintf(stderr, "%s did not open\n", fname); return;} + + uint32 randseed = POLYINIT32; + for (int i = 0; i < sizeinmb; ++i) { + PadTo(&randseed, temp, 0, 1024 * 1024); + fwrite(temp, 1, 1024 * 1024, f); + } + + fclose(f); + free(temp); +} +// ---- End For creating a test file ---- + +int main(int argc, const char** argv) { + // Get started + const char* filename = argv[1]; + if (argc > 2) { + // If extra arg, create test file of that many MB and exit + int mb_to_create = atoi(argv[2]); + MakeTestFile(filename, mb_to_create); + fprintf(stderr, "%dMB written to %s\n", mb_to_create, filename); + return 0; + } + + if (argc < 2) { + fprintf(stderr, "Usage: disk_readhog [MB to create]\n"); + return 0; + } + + char* buffer = (char*)malloc(kReadBlockSize + 4096); + char* aligned_buffer = (char*)((long unsigned int)buffer & ~0xFFFlu); + double total_start, total_elapsed; + int fd = open(filename, O_RDONLY | O_DIRECT); + if (fd < 0) {perror("disk_readhog open"); return 0;} + + total_start = GetSec(); + int block_count = 0; + ssize_t n = 0; + // Loop for 60 seconds + while (GetSec() < (total_start + 60.0)) { + lseek(fd, 0, SEEK_SET); + while ((n = read(fd, aligned_buffer, kReadBlockSize)) > 0) {++block_count;} + if (n < 0) {perror("disk_readhog read"); return 0;} + } + + // All done + total_elapsed = GetSec() - total_start; + int mb_read = block_count / kBlocksPerMB; + fprintf(stdout, "Elapsed time for %dMB %5.3f sec = %4.1fMB/sec\n", + mb_read, total_elapsed, mb_read / total_elapsed); + + close(fd); + return 0; +} + diff --git a/book-user-code/dumplogfile4.cc b/book-user-code/dumplogfile4.cc new file mode 100644 index 000000000000..20cf7777ab87 --- /dev/null +++ b/book-user-code/dumplogfile4.cc @@ -0,0 +1,212 @@ +// dumplogfile4.cc cloned from dumplogfile.cc 2018.04.16 +// Little program to dump a binary log file +// Copyright 2021 Richard L. Sites +// +// compile with g++ -O2 dumplogfile4.cc dclab_log.cc -o dumplogfile4 +// +// expect filename(s) to be of form +// client4_20180416_151126_dclab-1_3162.log +// +// Hex dump a log file: +// od -Ax -tx4z -w32 foo.log +// + +#include +#include + +#include "dclab_log.h" + +// Assumed Ethernet speed in gigabits per second +static const int64 kGbs = 1; + +// Assumed RPC message overhead, in addition to pure data +static const int64 kMsgOverheadBytes = 100; + +// Assumed time for missing transmission or server time, in usec +static const int kMissingTime = 2; + +static char gTempBuffer[24]; + +// 2**0.0 through 2** 0.9 +static const double kPowerTwoTenths[10] = { + 1.0000, 1.0718, 1.1487, 1.2311, 1.3195, + 1.4142, 1.5157, 1.6245, 1.7411, 1.8661 +}; + +int64 imax(int64 a, int64 b) {return (a >= b) ? a : b;} + +// return 2 * (x/10) +int64 ExpTenths(uint8 x) { + int64 powertwo = x / 10; + int64 fraction = x % 10; + int64 retval = 1l << powertwo; + retval *= kPowerTwoTenths[fraction]; + return retval; +} + + +// Return sec to transmit x bytes at y Gb/s, where 1 Gb/s = 125000000 B/sec +// but we assume we only get about 90% of this for real data, so 110 B/usec +int64 BytesToUsec(int64 x) { + int64 retval = x * kGbs / 110; + return retval; +} + +int64 RpcMsglglenToUsec(uint8 lglen) { + return BytesToUsec(ExpTenths(lglen) + kMsgOverheadBytes); +} + + +// Turn seconds since the epoch into yyyy-mm-dd_hh:mm:ss +// Not valid after January 19, 2038 +const char* FormatSecondsDateTimeLong(int64 sec) { + // if (sec == 0) {return "unknown";} // Longer spelling: caller expecting date + time_t tt = sec; + struct tm* t = localtime(&tt); + sprintf(gTempBuffer, "%04d-%02d-%02d_%02d:%02d:%02d", + t->tm_year + 1900, t->tm_mon + 1, t->tm_mday, + t->tm_hour, t->tm_min, t->tm_sec); + return gTempBuffer; +} + +void PrintJsonHeader(FILE* f, int64 basetime, const char* title) { + // Convert usec to sec and format date_time + const char* base_char = FormatSecondsDateTimeLong(basetime / 1000000); + // Leading spaces force header lines to sort to front + fprintf(f, " {\n"); + fprintf(f, " \"Comment\" : \"V4 flat RPCs\",\n"); + fprintf(f, " \"axisLabelX\" : \"Time (sec)\",\n"); + fprintf(f, " \"axisLabelY\" : \"RPC Number\",\n"); + fprintf(f, " \"deltaT23\" : 0,\n"); + fprintf(f, " \"flags\" : 0,\n"); + fprintf(f, " \"gbs\" : 1,\n"); + fprintf(f, " \"shortMulX\" : 1,\n"); + fprintf(f, " \"shortUnitsX\" : \"s\",\n"); + fprintf(f, " \"thousandsX\" : 1000,\n"); + fprintf(f, " \"title\" : \"%s\",\n", title); + fprintf(f, " \"tracebase\" : \"%s\",\n", base_char); + fprintf(f, " \"version\" : 4,\n"); + fprintf(f, "\"events\" : [\n"); +} + +void PrintJsonFooter(FILE* f) { + fprintf(f, "[999.0, 0.0, 0.0, 0.0, \"\", \"\", 0.0, 0.0, 0, 0, \"\", \"\", \"\", 0, \"\"]\n"); + fprintf(f, "]}\n"); +} + +void usage() { + fprintf(stderr, "Usage: dumplogfile4 [-all] [-req] \"title\" \n"); + fprintf(stderr, " By default, only complete (client type RespRcv) transactions are dumped.\n"); + fprintf(stderr, " Use -all to see incomplete transactions (server side are all incomlete).\n"); +} + +static const int kMaxFileNames = 100; + +int main(int argc, const char** argv) { + bool dump_raw = false; + bool dump_all = false; + bool dump_req = false; + int next_fname = 0; + const char* fname[kMaxFileNames]; + const char* title = NULL; + + // Pick up arguments + for (int i = 1; i < argc; ++i) { + if (argv[i][0] != '-') { + if (title == NULL) { + title = argv[i]; + } else { + fname[next_fname++] = argv[i]; + if (next_fname >= kMaxFileNames) { + fprintf(stderr, "More than %d file names.\n", kMaxFileNames); + return 0; + } + } + } else if (strcmp(argv[i], "-raw") == 0) { + dump_raw = true; + } else if (strcmp(argv[i], "-all") == 0) { + dump_all = true; + } else if (strcmp(argv[i], "-req") == 0) { + dump_req = true; + } else { + usage(); + return 0; + } + } + + if (next_fname == 0) { + usage(); + return 0; + } + + if (title == NULL) {title = "Placeholder title";} + + FILE* logfile; + BinaryLogRecord lr; + int64 basetime = 0; // In usec + // Process log files in order presented + for (int i = 0; i < next_fname; ++i) { + logfile = fopen(fname[i], "rb"); + if (logfile == NULL) { + fprintf(stderr, "%s did not open\n", fname[i]); + return 0; + } + + // Always dump complete transactions, from client-side logs RespRcvType + // If -req, dump completed server-side requests RespSendType + // If -all, dump all log records + while(fread(&lr, sizeof(BinaryLogRecord), 1, logfile) != 0) { + bool dumpme = false; + if (dump_all) {dumpme = true;} + if (dump_req && lr.type == RespSendType) {dumpme = true;} + if (lr.type == RespRcvType) {dumpme = true;} + if (!dumpme) {continue;} + + // Pick off base time at first RPC + if ((basetime == 0) && (lr.req_send_timestamp != 0)) { + // Round down usec time to multiple of one minute + basetime = (lr.req_send_timestamp / 60000000) * 60000000; + PrintJsonHeader(stdout, basetime, title); + } + + // Estimated network transmission times + int64 est_req_usec = RpcMsglglenToUsec(lr.lglen1); + int64 est_resp_usec = RpcMsglglenToUsec(lr.lglen2); + + if (!dump_raw) { + // Fill in any missing times (incomlete RPCs) + // Missing t2 etc. must include estimated transmission time + // Times in usec + if (lr.req_rcv_timestamp == 0) { + lr.req_rcv_timestamp = lr.req_send_timestamp + est_req_usec + kMissingTime; + } + if (lr.resp_send_timestamp == 0) { + lr.resp_send_timestamp = lr.req_rcv_timestamp + kMissingTime; + } + if (lr.resp_rcv_timestamp == 0) { + lr.resp_rcv_timestamp = lr.req_send_timestamp + + (lr.resp_send_timestamp - lr.req_rcv_timestamp) + + est_req_usec + kMissingTime + est_resp_usec + kMissingTime; + } + + // Enforce that nonzero times are non-decreasing + if (lr.req_rcv_timestamp != 0) { + lr.req_rcv_timestamp = imax(lr.req_rcv_timestamp, lr.req_send_timestamp); + } + if (lr.resp_send_timestamp != 0) { + lr.resp_send_timestamp = imax(lr.resp_send_timestamp, lr.req_rcv_timestamp); + } + if (lr.resp_rcv_timestamp != 0) { + lr.resp_rcv_timestamp = imax(lr.resp_rcv_timestamp, lr.resp_send_timestamp); + } + } + + PrintLogRecordAsJson(stdout, &lr, basetime); + } + fclose(logfile); + } + PrintJsonFooter(stdout); + + return 0; +} + diff --git a/book-user-code/eventtospan3.cc b/book-user-code/eventtospan3.cc new file mode 100644 index 000000000000..57fd4da776e1 --- /dev/null +++ b/book-user-code/eventtospan3.cc @@ -0,0 +1,2997 @@ +// Little program to turn sorted Ascii event listings into timespans +// covering 100% of the time on each CPU core +// The main work is tracking returns and dealing with missing events +// Copyright 2021 Richard L. Sites +// + +// 2021.10.21 Redefine PSTATE as sample *after* the CPU frequency has changed (x86), +// PSTATE2 as notify *before* the CPU frequency has changed (RPi4) +// TODO: determine if PSTATE2 applies to all cores or just the one. + + +// TODO: +// If send/receive on same machine, transmission will be faster than over Ether +// - if tx.XYZW and then rx.XYZW, transmission time is no more than ts difference +// - in that case, tx and rx overlap in time and are short +// +// move position of event to just after duration +// accept exported names +// upgrade ts and duration to uint64 instead of int +// Ignore comments starting with # +// Expand span to include arg retval name +// change output to be json +// dick sites 2017.08.21 +// add mwait-implied spans +// dick sites 2017.08.21 +// Attach initial pid name to first entry's CPU stack +// dick sites 2017.11.18 +// add optional instructions per cycle IPC support +// dick sites 2018.06.14 +// Take another run at merging in PID names +// dick sites 2020.01.21 +// Reorganize around pre-process inserting extra events as needed +// +// dsites 2019.03.11 +// Grab PID from front of each traceblock +// Move standalone return value into call span's return value +// dsites 2019.05.12 +// shorten AMD mwait to 13.3 usec +// allow irq within BH +// do not pop so soon on ctx inside sched +// dummy push change to alter current span, back to its start +// dsites 2019.10.29 +// Add waiting spans +// dsites 2020.01.22 +// Rewrite and restructuring +// 2020.01.30 dsites Add PC samples +// 2020.02.01 dsites Add execution and profile aggregation +// 2020.02.04 dsites Have PC samples represent time before sample, not after +// 2020.07.13 dsites Have user process continue thru end of sched ctx switch +// to avoid bogus wait_* spans. Also see NestLevel +// 2020.07.13 dsites Address ambiguity of syscall/fault exit vs. block-resume +// 2020.08.19 dsites Add random traceid +// 2020.09.29 dsites Add lock lines +// 2020.11.01 dsites Turn raw filtered packet events into matching RPC packet spans +// 2020.11.06 dsites Fix corrupted idle stacked state at context switch +// 2020.11.13 dsites Added processing for RPC-to-packet correlation +// 2020.11.13 dsites Added codepoint for net speed, default to 1 Gbs +// 2021.01.26 dsites Redo RPC-to-packet correlation with hash16 +// 2021.01.28 dsites Complete redo of RPC-to-packet correlation with hash32 again +// 2021.02.02 dsites Carry RPCid across ctx switch and back +// 2021.02.03 dsites Add queue names, enqueue/dequeue spans +// 2021.10.21 dsites Add pstate2 for Raspberry Pi +// 2021.10.22 dsites Chanfe mwait to wfi for Raspberry Pi + +// Compile with g++ -O2 eventtospan3.cc -o eventtospan3 + + +/*TODO: + spantospan new ipc + spantotrim new ipc +*/ + +#include +#include + +#include +#include // exit, random +#include +#include +#include // getpid gethostname +#include // gettimeofday +#include + +#include "basetypes.h" +#include "kutrace_control_names.h" +#include "kutrace_lib.h" + +// Event numbers or related masks +#define call_mask 0xc00 +#define call_ret_mask 0xe00 +#define ret_mask 0x200 +#define type_mask 0xf00 + +// Names 001..1ff +// Point events 200..3ff +#define dummy_trap 0x4ff +#define dummy_irq 0x5ff +#define dummy_syscall 0x9ff +#define largest_non_pid 0xfff +#define pid_idle 0 +#define event_idle (0x10000 + pid_idle) +#define event_c_exit 0x20000 + +#define sched_syscall 0x9ff +#define sched_sysret 0xbff + +#define ipc_mask 0x0f + + +// Additional drawing events +#define ArcNum -3 + +static const char* kIdleName = "-idle-"; +static const char* kIdlelpName = "-idlelp-"; +static const int kMAX_CPUS = 80; +static const int kNetworkMbitSec = 1000; // Default: 1 Gb/s if not in trace + +static const uint64 kMIN_CEXIT_DURATION = 10LL; // 0.100 usec in multiples of 10 nsec +static const uint64 kMIN_WAIT_DURATION = 10LL; // 0.100 usec in multiples of 10 nsec +static const uint64 kMAX_PLAUSIBLE_DURATION = 800000000LL; // 8 sec in multiples of 10 nsec +static const uint64 kONE_MINUTE_DURATION = 6000000000LL; // 60 sec in multiples of 10 nsec +static const uint64 kONE_HOUR = 360000000000LL; // 3600 sec in multiples of 10 nsec + +// We allow 26 waiting reasons, a-z, each displayed as Morse code +static const char* kWAIT_NAMES[26] = { + "wait_a", "wait_b", "wait_cpu", "wait_disk", + "wait_e", "wait_f", "wait_g", "wait_h", + "wait_i", "wait_j", "wait_task", "wait_lock", + "wait_mem", "wait_net", "wait_o", "wait_pipe", + "wait_q", "wait_rcu", "wait_sche", "wait_time", + "wait_u", "wait_v", "wait_w", "wait_x", + "wait_y", "wait_unk", +}; + +// ./drivers/idle/intel_idle.c +// "C1-HSW", 0x00, .exit_latency = 2, // times 100ns ? +// "C1E-HSW", 0x01, .exit_latency = 10, +// "C3-HSW", 0x10, .exit_latency = 33, +// "C6-HSW", 0x20, .exit_latency = 133, +// "C7s-HSW", 0x32, .exit_latency = 166, +// "C8-HSW", 0x40, .exit_latency = 300, +// "C9-HSW", 0x50, .exit_latency = 600, +// "C10-HSW", 0x60, .exit_latency = 2500, + +// Time for coming out of idle deep sleep +// Table entries are unspecified units; assume for the moment multiples of 100ns +static const int kLatencyTable[256] = { + 2, 10, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, + 133,133,133,133, 133,133,133,133, 133,133,133,133, 133,133,133,133, + 166,166,166,166, 166,166,166,166, 166,166,166,166, 166,166,166,166, + + 300,300,300,300, 300,300,300,300, 300,300,300,300, 300,300,300,300, + 600,600,600,600, 600,600,600,600, 600,600,600,600, 600,600,600,600, + 2500,2500,2500,2500, 2500,2500,2500,2500, 2500,2500,2500,2500, 2500,2500,2500,2500, + 2500,2500,2500,2500, 2500,2500,2500,2500, 2500,2500,2500,2500, 2500,2500,2500,2500, + + 2500,2500,2500,2500, 2500,2500,2500,2500, 2500,2500,2500,2500, 2500,2500,2500,2500, + 2500,2500,2500,2500, 2500,2500,2500,2500, 2500,2500,2500,2500, 2500,2500,2500,2500, + 2500,2500,2500,2500, 2500,2500,2500,2500, 2500,2500,2500,2500, 2500,2500,2500,2500, + 2500,2500,2500,2500, 2500,2500,2500,2500, 2500,2500,2500,2500, 2500,2500,2500,2500, + + 2500,2500,2500,2500, 2500,2500,2500,2500, 2500,2500,2500,2500, 2500,2500,2500,2500, + 2500,2500,2500,2500, 2500,2500,2500,2500, 2500,2500,2500,2500, 2500,2500,2500,2500, + 2500,2500,2500,2500, 2500,2500,2500,2500, 2500,2500,2500,2500, 2500,2500,2500,2500, + 2500,2500,2500,2500, 2500,2500,2500,2500, 2500,2500,2500,2500, 2500,2500, 2, 133, + // [254] RPI4-B wfi() guess + // [255] AMD mwait guess +}; + +// 2**0.0 through 2** 0.9 +static const double kPowerTwoTenths[10] = { + 1.0000, 1.0718, 1.1487, 1.2311, 1.3195, + 1.4142, 1.5157, 1.6245, 1.7411, 1.8661 +}; + + +using std::map; +using std::multimap; +using std::string; + + +// Per-PID short stack of events to return to. +// These are saved/restored when a thread, i.e. pid, is context switched out +// and later starts running again, possibly on another CPU. +// stack[0] is always a user-mode pid +// stack[1] is a system call or interrupt or fault +// stack[2] and [3] are nested interrupts/faults +// stack[4] can be scheduler +// +// +---------------+ +// | ambiguous | +// +---------------+ +// | rpcid | +// +---------------+ +// | enque_num | +// +---------------+ +// | deque_num | +// +---------------+ +// | top = 0..4 | +// +---------------+ +// +---------------+ +-------------------------------+ +// | eventnum | | name | 0 user +// +---------------+ +-------------------------------+ +// | eventnum | | name | 1 syscall +// +---------------+ +-------------------------------+ +// | eventnum | | name | 2 fault +// +---------------+ +-------------------------------+ +// | eventnum | | name | 3 interrupt +// +---------------+ +-------------------------------+ +// | eventnum | | name | 4 scheduler +// +---------------+ +-------------------------------+ +// +typedef struct { + int ambiguous; // Nonzero=True if scheduler runs within syscall/fault/IRQ + // Not clear if scheduler exit returns or goes to user code + // In this case, record subscript of ambiguous stack entry + // See FixupAmbiguousSpan for details. + int rpcid; // Current RPC id for this PID. Overrides event.rpcid + int enqueue_num_pending; // For piecing together RPC waiting in a queue (-1 = inactive) + int dequeue_num_pending; // For piecing together RPC waiting in a queue (-1 = inactive) + int top; // Top of our small stack + int eventnum[5]; // One or more event numbers that are stacked calls + string name[5]; // One or more event names that are stacked calls +} PidState; + + + +// Event, from rawtoevent +// +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----------+ +// | ts | dur | cpu | pid | rpc |event| arg | ret | ipc | name | +// +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----------+ +// + +// Span, from start/end events +// After StartSpan +// +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----------+ +// | ts | /// | cpu | pid | rpc |event| arg | ret | /// | name | +// +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----------+ +// After FinishSpan +// +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----------+ +// | ts | dur | cpu | pid | rpc |event| arg | ret | ipc | name | +// +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----------+ +// +typedef struct { + uint64 start_ts; // Multiples of 10 nsec + uint64 duration; // Multiples of 10 nsec + int cpu; + int pid; + int rpcid; // For incoming events, this is bogus except in RPCIDREQ/RESP + int eventnum; + int arg; + int retval; + int ipc; + string name; +} OneSpan; + + +// RPC correlation, one entry per pid +// k_timestamp is filled in by kernel TX_PKT (rpcid is known), else +// by RX_USER (rpcid is known) copying from hash entry +// Using the PID, this accumulates three pieces from kernel/user/rpcid entries +typedef struct { + uint64 k_timestamp; // Time kernel code saw hash32. 0 means not known yet + uint32 rpcid; // 0 means not known yet + uint16 lglen8; // 0 means not known yet + bool rx; // true if rx +} PidCorr; + +// RPC correlation, Packet or message hash to PID correlation, one entry per hash32 +// Using the common hash32 value, this carries ts or pid between kernel/user packet entries +typedef struct { + uint64 k_timestamp; // Time kernel code saw hash32. 0 means not known yet + uint32 pid; // 0 means not known yet +} HashCorr; + + +// Contended-lock pending since ts with lock held by pid (-1 if unknown) +typedef struct { + uint64 start_ts; + int pid; + int eventnum; +} LockContend; + + +// Per-CPU state: M sets of these for M CPUs +// +---------------+ +// | cpu_stack o-|--> current thread's PidState w/return stack +// +---------------+ saved and restored across context switches +// cur_span: +// +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----------+ +// | ts | dur | cpu | pid | rpc |event| arg | ret | ipc | name | +// +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----------+ +// +---------------+ +// |prior_pstate_ts| +// +---------------+ +// | ... _freq| +// +---------------+ +// |prior_pc_samp_t| +// +---------------+ +// | ctx_switch_ts | +// +---------------+ +// | mwait_pending | +// +---------------+ +// | oldpid | +// +---------------+ +// | newpid | +// +---------------+ +// | valid_span | +// +---------------+ +// +typedef struct { + PidState cpu_stack; // Current call stack & span for each of kMAX_CPUS CPUs + OneSpan cur_span; + uint64 prior_pstate_ts; // Used to assign duration to each pstate (CPU clock freq) + uint64 prior_pstate_freq; // Used to assign frequency to each pstate2 span + uint64 prior_pc_samp_ts; // Used to assign duration to each PC sample + uint64 ctx_switch_ts; // Used if /sched is missing + int mwait_pending; // eax value 00..FF, from mwait event.arg + int oldpid; // The pid on this CPU just before a context switch + int newpid; // The (current) pid on this CPU just after a context switch + // Above two are used at scheduler exit if a wakeup of oldpid + // occurs *during* scheduling + bool valid_span; // Not valid span at beginning of trace +} CPUState; + +// +// Globals across all CPUs +// +typedef map PerPidState; // State of each suspended task, by PID +typedef map IntName; // Name for each PID/lock/method +typedef map PidWakeup; // Previous wakeup event, by PID +typedef map PidTime; // Previous per-PID timestamp (span end, kernel-seen packet) +typedef map PidLock; // Previous per-PID lock hash number +typedef map PidHash32; // Previous per-PID pending user packet hash number +typedef map PidRunning; // Set of currently-running PIDs +typedef map LockPending; // Previous lock try&fail event, by lockhash&pid + // Multiple threads can be wanting the same lock +typedef map PidToCorr; // pid to +typedef map HashToCorr; // hash32 to +typedef map RpcQueuetime; // rpcid to enqueue timestamp + + +// RPC-to-packet correlation +// +// This elaborate-looking song-and-dance came about because I do not want +// the kernel code to know anything about dclab-specific RPC message formats, +// but do want to correlate the user-mode arrival of a message with the +// first packet's arrival at kernel code. +// +// The remaining packets of a long message are either not traced +// (based on the loadable module filter parameters), +// or are traced and ignored here because they do not have +// a hash over the first 32 bytes that match a specific dclab RPC message. +// +// For each RPC, the trace has a METHODNAME entry: +// +// +-------------------+-----------+-------------------------------+ +// | timestamp | event | rpcid | (0) +// +-------------------+-----------+-------------------------------+ +// | character name, 1-56 bytes, NUL padded | +// +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -+ +// ~ ~ +// +---------------------------------------------------------------+ +// +// A trace may contain entries generated within the kernel TCP and UDP +// processing that identify and timestamp selected packets. The entries +// are KUTRACE_RX_PKT and KUTRACE_TX_PKT with this format: +// +// +-------------------+-----------+---------------+-------+-------+ +// | timestamp 1 | event | hash32 | (1) +// +-------------------+-----------+---------------+-------+-------+ +// 20 12 8 8 16 +// +// The hash is a four-byte-wide XOR over the first 32 bytes of the packet payload +// starting at the byte just after the TCP or UDP header. Selected packets have +// at least 32 bytes of payload, and they pass a simple filter test over the +// first 24 bytes of the payload, based on 24 bytes of mask and 8 bytes of +// expected four-byte-wide XOR value after masking. The algorithm is compiled +// into the kernel TCP and UDP code, but the mask and expected values are +// supplied to the kutrace loadable module at its startup. +// +// The default filter picks off just packets that start with the dclab_rpc.h +// kMarkerSignature value. All packets and no packets are also choices, along +// with other combinations of user-specified byte masks and expected XOR value. +// +// The user-mode dclab RPC routines generate RPC request/response events. +// +// The working-on-RPC events KUTRACE_RPCIDREQ and KUTRACE_RPCIDRESP have this +// format: +// +-------------------+-----------+---------------+-------+-------+ +// | timestamp 2 | event | lglen8 | RPCid | (2) +// +-------------------+-----------+---------------+-------+-------+ +// 20 12 8 8 16 +// +// where lglen8 is ten times the log base 2 of the entire message length +// (messages are often multiple packets). A non-zero RPCid specifies working +// on some RPC while zero specifies working on no RPC. +// +// The first mention of an incoming RPC message may also have a hash32 value +// taken over the first 32 bytes of the message -- the dclab RPC marker and +// initial part of the RPC header. The entries are KUTRACE_RX_USER and +// KUTRACE_TX_USER with the same format as the kernel entries above: +// +// +-------------------+-----------+---------------+-------+-------+ +// | timestamp 3 | event | hash32 | (3) +// +-------------------+-----------+---------------+-------+-------+ +// 20 12 8 8 16 +// +// This value can be used with high probability to find a matching previous +// RX_PKT or following TX_PKT entry (1), which would then give the time at which +// the first packet of a message was processed in the kernel code. Combined with +// the lglen value, this can be used to aproximate when the entire message +// occupied the network link, shown by the synthetic KUTRACE_RPCIDRXMSG and +// KUTRACE_RPCIDTXMSG text events created here: +// +// TS DUR EVENT CPU PID RPC ARG0 RETVAL IPC NAME +// timestamp1 dddd 516 0 0 rrrr len 0 0 method.rrrr (204) +// +// (ts and dur multiples of 10ns): +// +// and where timestamp1 comes from the kernel KUTRACE_RX_PKT event (1), +// the rpcid rrrr and message length len come from the user +// KUTRACE_RPCIDREQ/RESP event (2), the method name comes from the usual +// KUTRACE_METHODNAME event, and the duration dddd comes from a calculation +// based on the message length and network bitrate NetworkMbPerSec which +// currently just defaults to 1000 Mb/s but in the future will come from +// the trace itself. +// +// If both tx and rx are on the same computer, no Ether is used, so transmission +// time is very fast, bounded by matching kernel tx and rx timestamps. +// The current code does not correct for this. +// +// Note that the input events (0) (1) (2) (3) may appear in the trace only in +// the order described below, and may be created on different CPUs. +// The code caters to that. +// +// There will be occasional duplicates of the 16-bit values involved.This will +// sometimes create false reconstructions of packet traffic. +// +// Expected events and their data +// a: rpcid +// b: method name +// c: length +// d: hash32 +// e: kernel-seen timestamp +// f: pid +// +// client request: METHODNAME(ab), RPCIDREQ(acf), TX_USER(df), TX_PKT(de); put out TXMSG(abce) +// server request: RX_PKT(de), RX_USER(df), METHODNAME(ab), RPCIDREQ(acf); put out RXMSG(abce) +// server response: RPCIDRESP(acf), TX_USER(df), TX_PKT(de); put out TXMSG(abce) +// client response: RX_PKT(de), RX_USER(df), RPCIDRESP(acf); put out RXMSG(abce) +// +// 2021.01.28 BUGs +// rx/tx widths wrong +// packet length and position is correct for tx +// + + +// global queue names +IntName queuenames; // small_int => queue name definitions +RpcQueuetime enqueuetime; // rpcid => enqueue time + +// RPC global method names +IntName methodnames; // rpcid => method name definitions + +// Pending RPC globals -- what we know about them so far. Transient across short sequences +// of the events above +PidToCorr pidtocorr; // One process can only be doing one message RX/TX at once +HashToCorr rx_hashtocorr; // Low-level Kernel/user can be doing multiple overlapping +HashToCorr tx_hashtocorr; // packetsat once +static const PidCorr initpidcorr = {0, 0, 0, false}; +static const HashCorr inithashcorr = {0, 0}; + +// Globals +bool verbose = false; +bool trace = false; +bool rel0 = false; +bool is_rpi = false; // True for Raspberry Pi + +string kernel_version; +string cpu_model_name; +string host_name; +int mbit_sec = kNetworkMbitSec; // Default +int max_cpu_seen = 0; // Keep track of how many CPUs there are + + +static uint64 span_count = 0; +static int incoming_version = 0; // Incoming version number, if any, from ## VERSION: 2 +static int incoming_flags = 0; // Incoming flags, if any, from ## FLAGS: 128 +IntName pidnames; // Current name for each PID, by pid# +IntName pidrownames; // Collected names for each PID (clone, execve, etc. rename a thread), by pid# +PidWakeup pendingWakeup; // Any pending wakeup event, to make arc from wakeup to running +PidWakeup priorPidEvent; // Any prior event for each PID, to make wait_xxx display +PidTime priorPidEnd; // Any prior span end for each PID, to make wait_xxx display +PidLock priorPidLock; // Any prior lock hash number for each PID, to make wait_xxx display +IntName locknames; // Incoming lock name definitions +LockPending lockpending; // pending KUTRACE_LOCKNOACQUIRE/etc. events, by lock hash +PidWakeup pendingLock; // Any pending lock acquire event, to make wait_lock from try to acquire +PidTime pendingKernelRx; // Any pending RX_PKT time, waiting for subsequent RPCID +PidRunning pidRunning; // Set of currently-running PIDs + // A PID is running from the /sched that context switches to it until + // the /sched that context switches away from it. It is thus running + // during all of that second context switch. Any wakeup delivered while + // it is running creates no waiting before that wakeup. + +// Stats +double total_usermode = 0.0; +double total_idle = 0.0; +double total_kernelmode = 0.0; +double total_other = 0.0; + + +// Fold 32-bit rpcid to 16-bit one +// 32-bit rpcid is never zero. If low bits are zero, use high bits +inline uint32 rpcid32_to_rpcid16(uint32 rpcid) { + uint32 tempid = rpcid & 0xffff; + return (tempid == 0) ? (rpcid >> 16) : tempid; +} + +// Simple tests +uint64 uint64min(uint64 a, uint64 b) {return (a < b) ? a : b;} +uint64 uint64max(uint64 a, uint64 b) {return (a > b) ? a : b;} + + +// Events fall into five broad categories: +// (1) Text names for events +// (2) Point events +// (3) Kernel-mode execution +// (4) User-mode execution +// (5) Output-only HTML drawing events, not found in input raw binary traces + +// (1) Any name definition +bool IsNamedef(int eventnum) { + return (KUTRACE_VARLENLO <= eventnum) && (eventnum <= KUTRACE_VARLENHI); +} + +// (2) Any point event 0x200 ..0x3FF +// userpid, rpc, runnable, ipi, mwait, pstate, mark, lock, [not special:] pc, wait +bool IsAPointEvent(const OneSpan& event) { + return ((KUTRACE_USERPID <= event.eventnum) && (event.eventnum < KUTRACE_TRAP)); +} + +// (3) Any kernel-mode execution event +bool IsKernelmode(const OneSpan& event) { + return ((KUTRACE_TRAP <= event.eventnum) && (event.eventnum < event_idle)); +} +bool IsKernelmodenum(int eventnum) { + return ((KUTRACE_TRAP <= eventnum) && (eventnum < event_idle)); +} + +// (4) Any user-mode-execution event, in range 0x10000 .. 0x1ffff +// These includes the idle task +bool IsUserExec(const OneSpan& event) { + return ((event.eventnum & 0xF0000) == 0x10000); +} +bool IsUserExecnum(int eventnum) { + return ((eventnum & 0xF0000) == 0x10000); +} + +bool IsCExitnum(int eventnum) { + return (eventnum == 0x20000); +} + +// True if the event means we must be executing in kernel mode +bool OnlyInKernelMode(const OneSpan& event) { + if ((event.eventnum & 0xF00) == KUTRACE_TRAPRET) {return true;} // Returns 6xx + if ((event.eventnum & 0xF00) == KUTRACE_IRQRET) {return true;} // 7xx + if ((event.eventnum & 0xE00) == KUTRACE_SYSRET64) {return true;} // Axx, Bxx + if ((event.eventnum & 0xE00) == KUTRACE_SYSRET32) {return true;} // Exx, Fxx + if (event.eventnum == KUTRACE_USERPID) {return true;} // context switch + if (event.eventnum == KUTRACE_RUNNABLE) {return true;} // make runnable + if (event.eventnum == KUTRACE_IPI) {return true;} // send IPI + if (event.eventnum == KUTRACE_PSTATE) {return true;} // current CPU clock frequency + if (event.eventnum == KUTRACE_PSTATE2) {return true;} // current CPU clock frequency + if (event.eventnum == KUTRACE_PC_K) {return true;} // kernel-mode PC sample in timer irq + if (event.eventnum == KUTRACE_PC_U) {return true;} // user-mode PC sample in timer irq 2020.11.06 + if (event.eventnum == sched_syscall) {return true;} // only kernel can call the scheduler + if (event.eventnum == sched_sysret) {return true;} // only kernel can return from the scheduler + return false; +} + +// True if the event means we must be executing in user mode +bool OnlyInUserMode(const OneSpan& event) { + if (event.eventnum == sched_syscall) {return false;} // only kernel call the scheduler + if ((event.eventnum & 0xE00) == KUTRACE_SYSCALL64) {return true;} // Calls 8xx, 9xx + if ((event.eventnum & 0xE00) == KUTRACE_SYSCALL32) {return true;} // Calls Cxx, Dxx + if (event.eventnum == KUTRACE_MWAIT) {return true;} // mwait (in idle loop); so not inside call/irq/fault + if (event.eventnum == KUTRACE_MARKA) {return true;} // Marks [actually, these *could* be used in kernel] + if (event.eventnum == KUTRACE_MARKB) {return true;} // Marks + if (event.eventnum == KUTRACE_MARKC) {return true;} // Marks + if (event.eventnum == KUTRACE_MARKD) {return true;} // Marks + return false; +} + +// (5) Output-only HTML drawing events +// KUTRACE_WAITA .. KUTRACE_WAITZ for the various reasons a process or RPC waits + + +// Refinements --------------------------------------------- + +// (1) Lock-name event, etc. +bool IsLockNameInt(int eventnum) { + return ((eventnum & 0xF0F)== KUTRACE_LOCKNAME); +} +bool IsKernelVerInt(int eventnum) { + return ((eventnum & 0xF0F) == KUTRACE_KERNEL_VER); +} +bool IsModelNameInt(int eventnum) { + return ((eventnum & 0xF0F) == KUTRACE_MODEL_NAME); +} +bool IsHostNameInt(int eventnum) { + return ((eventnum & 0xF0F) == KUTRACE_HOST_NAME); +} +bool IsMethodNameInt(int eventnum) { + return ((eventnum & 0xF0F) == KUTRACE_METHODNAME); +} +bool IsQueueNameInt(int eventnum) { + return ((eventnum & 0xF0F) == KUTRACE_QUEUE_NAME); +} +bool IsPidNameInt(int eventnum) { + return ((eventnum & 0xF0F) == KUTRACE_PIDNAME); +} + +// (2) UserPid point event +bool IsAContextSwitch(const OneSpan& event) { + return (event.eventnum == KUTRACE_USERPID); +} + +// (2) Make-runnable point event +bool IsAWakeup(const OneSpan& event) { + return (event.eventnum == KUTRACE_RUNNABLE); +} + +// (2) Mwait point event +bool IsAnMwait(const OneSpan& event) { + return (event.eventnum == KUTRACE_MWAIT); +} + +// (2) mark point event +bool IsAMark(const OneSpan& event) { + return ((KUTRACE_MARKA <= event.eventnum) && (event.eventnum <= KUTRACE_MARKD)); +} +// (2) lock point event 0x210 ..0x212 +bool IsALockOneSpan(const OneSpan& event) { + return ((KUTRACE_LOCKNOACQUIRE <= event.eventnum) && (event.eventnum <= KUTRACE_LOCKWAKEUP)); +} +// (2) pstate point event +bool IsAPstate(const OneSpan& event) { + return ((event.eventnum == KUTRACE_PSTATE) || (event.eventnum == KUTRACE_PSTATE2)); +} +// (2) pc_sample point event +bool IsAPcSample(const OneSpan& event) { + return ((event.eventnum == KUTRACE_PC_U) || (event.eventnum == KUTRACE_PC_K) || (event.eventnum == KUTRACE_PC_TEMP)); +} +// (2) pc_sample point event +bool IsAPcSamplenum(int eventnum) { + return ((eventnum == KUTRACE_PC_U) || (eventnum == KUTRACE_PC_K) || (eventnum == KUTRACE_PC_TEMP)); +} + +// (2) RPC point event: REQ RESP MID with optional lglen8 +bool IsAnRpc(const OneSpan& event) { + return ((KUTRACE_RPCIDREQ <= event.eventnum) && (event.eventnum <= KUTRACE_RPCIDMID)); +} +// (2) RPC point event: REQ or RESP +bool IsRpcReqRespInt(int eventnum) { + return ((eventnum == KUTRACE_RPCIDREQ) || (eventnum == KUTRACE_RPCIDRESP)); +} +// (2) RPC network message, giving approximate time on the wire +bool IsAnRpcMsg(const OneSpan& event) { + return ((KUTRACE_RPCIDRXMSG <= event.eventnum) && (event.eventnum <= KUTRACE_RPCIDTXMSG)); +} +// (2) enque point event +bool IsAnEnqueue(const OneSpan& event) { + return (KUTRACE_ENQUEUE == event.eventnum); +} +// (2) deque point event +bool IsADequeue(const OneSpan& event) { + return (KUTRACE_DEQUEUE == event.eventnum); +} + + +// Return true if the event is raw kernel packet receive/send time and hash +bool IsRawPktHashInt(int eventnum) { + return ((eventnum == KUTRACE_RX_PKT) || (eventnum == KUTRACE_TX_PKT)); +} + +// Kernel code sees a packet +bool IsRawRxPktInt(int eventnum) {return (eventnum == KUTRACE_RX_PKT);} +bool IsRawTxPktInt(int eventnum) {return (eventnum == KUTRACE_TX_PKT);} + +// User code sees a packet +bool IsUserRxPktInt(int eventnum) {return (eventnum == KUTRACE_RX_USER);} +bool IsUserTxPktInt(int eventnum) {return (eventnum == KUTRACE_TX_USER);} + +// Incoming RPC request/response. Prior RX_USER has set up pidtocorr[pid] +bool IsIncomingRpcReqResp(const OneSpan& event) { + return IsRpcReqRespInt(event.eventnum) && (event.arg != 0) && + (pidtocorr.find(event.pid) != pidtocorr.end()); +} + +// Outgoing RPC request/response. No pending pidcorr[pid] +bool IsOutgoingRpcReqResp(const OneSpan& event) { + return IsRpcReqRespInt(event.eventnum) && (event.arg != 0) && + (pidtocorr.find(event.pid) == pidtocorr.end()); +} + + +// Return true if the event is user message receive/send time and hash +inline bool IsUserMsgHashInt(int eventnum) { + return (KUTRACE_RX_USER <= eventnum) && (eventnum <= KUTRACE_TX_USER); +} + +// (3) +bool IsACall(const OneSpan& event) { + ////if (IsUserExec(event)) return false; + if (largest_non_pid < event.eventnum) return false; + if ((event.eventnum & call_mask) == 0) return false; + if ((event.eventnum & ret_mask) != 0) return false; + return true; +} + +// (3) +bool IsAReturn(const OneSpan& event) { + if (largest_non_pid < event.eventnum) return false; + if ((event.eventnum & call_mask) == 0) return false; + if ((event.eventnum & ret_mask) == 0) return false; + return true; +} + +bool IsACallOrReturn(const OneSpan& event) { + if (largest_non_pid < event.eventnum) return false; + if ((event.eventnum & call_mask) == 0) return false; + return true; +} + +// (3) +bool IsASyscallOrReturn(const OneSpan& event) { // Must be a call/ret already + if ((event.eventnum & call_mask) == KUTRACE_SYSCALL64) {return true;} + if ((event.eventnum & call_mask) == KUTRACE_SYSCALL32) {return true;} + return false; +} + +// (3) +bool IsOptimizedCall(const OneSpan& event) { // Must be a call already + return (event.duration > 0); +} + +// (3) +// These syscalls return a pid_t of a new runnable task +// Catches both optimized calls and standalone return +bool IsNewRunnablePidSyscall(const OneSpan& event) { + if (!IsACallOrReturn(event)) {return false;} + if (!IsASyscallOrReturn(event)) {return false;} + if (event.name == "clone") {return true;} + if (event.name == "/clone") {return true;} + if (event.name == "fork") {return true;} + if (event.name == "/fork") {return true;} + return false; +} + + +// (3) +bool IsSchedCallEvent(const OneSpan& event) { + return (event.eventnum == sched_syscall); +} +bool IsSchedCallEventnum(int eventnum) { + return (eventnum == sched_syscall); +} + +bool IsSchedReturnEvent(const OneSpan& event) { + return (event.eventnum == sched_sysret); +} +bool IsSchedReturnEventnum(int eventnum) { + return (eventnum == sched_sysret); +} + + +// (4) +bool IsAnIdle(const OneSpan& event) { + return (event.eventnum == event_idle); +} +bool IsAnIdlenum(int eventnum) { + return (eventnum == event_idle); +} + +// (4) These exclude the idle task +bool IsUserExecNonidle(const OneSpan& event) { + return ((event.eventnum & 0xF0000) == 0x10000) && !IsAnIdle(event); +} +bool IsUserExecNonidlenum(int eventnum) { + return ((eventnum & 0xF0000) == 0x10000) && !IsAnIdlenum(eventnum); +} + +// (Other) +// Events that contribute to CPU execution time, including idle +// Also including RPC begin/middle/end markers +bool IsExecContrib(const OneSpan& event) { + if (event.duration < 0) {return false;} + if (KUTRACE_TRAP <= event.eventnum) {return true;} + if (IsAnRpc(event)) {return true;} + return false; +} + +// Events that contribute to profile: waits and PC samples +bool IsProfContrib(const OneSpan& event) { + if (event.duration < 0) {return false;} + if ((KUTRACE_PC_U <= event.eventnum) && (event.eventnum <= KUTRACE_PC_K)) {return true;} + if ((KUTRACE_WAITA <= event.eventnum) && (event.eventnum <= KUTRACE_WAITZ)) {return true;} + return false; +} + + +// End Refinements ----------------------------------------- + +// Convert ten * lg(x) back into x +uint64 TenPow(uint8 xlg) { + int powertwo = xlg / 10; + int fraction = xlg % 10; + uint64 retval = 1llu << powertwo; + retval = (retval * kPowerTwoTenths[fraction]) + 0.5; + return retval; +} + +// Convert message byte length to approximate usec on the wire +inline uint64 msg_dur_usec(uint64 length) { + return (length * 8) / mbit_sec; +} + +// Convert message byte length to approximate multiple of 10 nsec on the wire +inline uint64 msg_dur_10nsec(uint64 length) { + return (length * 800) / mbit_sec; +} + +// Clean away any non-Ascii characters +void Clean(string* s) { + for (int i = 0; i < s->length(); ++i) { + char c = (*s)[i]; + if ((c < ' ') || ('~' < c)) { + (*s)[i] = '?'; + } + } +} + +string IntToString(int x) { + char temp[24]; + sprintf(temp, "%d", x); + return string(temp); +} + + +// A user-mode-execution event is the pid number plus 64K +int PidToEventnum(int pid) {return (pid & 0xFFFF) + 0x10000;} +int EventnumToPid(int eventnum) {return eventnum & 0xFFFF;} + +// Format a user thread name +string NameAppendPid(string name, int pid) { + if (pid == 0) {return kIdleName;} + return name + "." + IntToString(pid); +} + +// Initially empty stack of -idle- running on this thread +void InitPidState(PidState* t) { + t->ambiguous = 0; + t->rpcid = 0; + t->enqueue_num_pending = -1; // No active partial RPC + t->dequeue_num_pending = -1; // No active partial RPC + t->top = 0; + for (int i = 0; i < 5; ++i) { + t->eventnum[i] = event_idle; + t->name[i].clear(); + t->name[i] = string(kIdleName); + } +} + +void BrandNewPid(int newpid, const string& newname, PerPidState* perPidState) { + PidState temp; + InitPidState(&temp); + temp.top = 1; + temp.eventnum[0] = PidToEventnum(newpid); + temp.name[0] = newname; + // Use current name, not the possibly-bad one from rawtoevent + if (pidnames.find(newpid) != pidnames.end()) { + temp.name[0] = NameAppendPid(pidnames[newpid], newpid); + } + temp.eventnum[1] = sched_syscall; + temp.name[1] = "-sched-"; + (*perPidState)[newpid] = temp; +} + + + +// Initially -idle- running on this CPUg +void InitSpan(OneSpan* s, int i) { + memset(s, 0, sizeof(OneSpan)); // Takes care of commented zeros below + // s->start_ts = 0; + // s->duration = 0; + s->cpu = i; + s->pid = pid_idle; + // s->rpcid = 0; + s->eventnum = event_idle; + // s->arg = 0; // idle(0) regular; idle(1) low-power after mwait + // s->retval = 0; + // s->ipc = 0; + s->name = kIdleName; +} + +// Example: +// [ 49.7328170, 0.0000032, 0, 0, 0, 1519, 0, 0, "local_timer_vector"], + + + +// Spans are dumped as < ... > +// Events as [ ... ] +// Stacks as { ... } + +void DumpSpan(FILE* f, const char* label, const OneSpan* span) { + fprintf(f, "%s <%llu %llu %d %d %d %d %d %d %d %s>\n", + label, span->start_ts, span->duration, span->cpu, + span->pid, span->rpcid, span->eventnum, span->arg, span->retval, span->ipc, span->name.c_str()); +} + +void DumpSpanShort(FILE* f, const OneSpan* span) { + fprintf(f, "<%llu %llu ... %s> ", span->start_ts, span->duration, span->name.c_str()); +} + +void DumpStack(FILE* f, const char* label, const PidState* stack) { + fprintf(f, "%s [%d] %d %d {\n", label, stack->top, stack->ambiguous, stack->rpcid); + for (int i = 0; i < 5; ++i) { + fprintf(f, " [%d] %05x %s\n",i, stack->eventnum[i], stack->name[i].c_str()); + } + fprintf(f, "}\n"); +} + +void DumpStackShort(FILE* f, const PidState* stack) { + fprintf(f, "%d{", stack->top); + for (int i = 0; i <= stack->top; ++i) { + fprintf(f, "%s ", stack->name[i].c_str()); + } + fprintf(f, "}%s %d ", stack->ambiguous ? "ambig" : "", stack->rpcid); +} + +void DumpEvent(FILE* f, const char* label, const OneSpan& event) { + fprintf(f, "%s [%llu %llu %d %d %d %d %d %d %d %s]\n", + label, event.start_ts, event.duration, event.cpu, + event.pid, event.rpcid, event.eventnum, event.arg, event.retval, event.ipc, event.name.c_str()); +} + + +// Complain if more than 60 seconds +bool CHECK(const char* lbl, const OneSpan& item) { + bool error = false; + if (item.start_ts > kONE_HOUR) {error = true;} + if (item.duration > kONE_MINUTE_DURATION) {error = true;} + if (item.start_ts + item.duration > kONE_HOUR) {error = true;} + if (error) {fprintf(stderr, "%s ", lbl); DumpEvent(stderr, "****CHECK ", item);} + return error; +} + + + +string MaybeExtend(string s, int x) { + string maybe = "." + IntToString(x); + if (s.find(maybe) == string::npos) {return s + maybe;} + return s; +} + + + +// Return floor of log base2 of x, i.e. the number of bits-1 needed to hold x +int FloorLg(uint64 x) { + int lg = 0; + uint64 local_x = x; + if (local_x & 0xffffffff00000000LL) {lg += 32; local_x >>= 32;} + if (local_x & 0xffff0000LL) {lg += 16; local_x >>= 16;} + if (local_x & 0xff00L) {lg += 8; local_x >>= 8;} + if (local_x & 0xf0LL) {lg += 4; local_x >>= 4;} + if (local_x & 0xcLL) {lg += 2; local_x >>= 2;} + if (local_x & 0x2LL) {lg += 1; local_x >>= 1;} + return lg; +} + + +// Close off the current span +// Remember each user-mode PID end in priorPidEnd +void FinishSpan(const OneSpan& event, OneSpan* span) { + // Prior span duration is up until new event timestamp + span->duration = event.start_ts - span->start_ts; + + // CHECK NEGATIVE or TOO LRAGE + if (span->duration > kMAX_PLAUSIBLE_DURATION) { // 8 sec in 10 nsec increments + // Too big to be plausible with timer interrupts every 10 msec or less, + // Except wait_* events can be very long + // Force short positive + span->duration = 1; // 10 nsec + + if (event.start_ts < span->start_ts) { + // Force negative span to short positive + //fprintf(stderr, "BUG %llu .. %llu, duration negative, %lld0ns\n", + // span->start_ts, event.start_ts, span->duration); + } else { + // Force big positive span to medium positive + // except, ignore spans starting at 0 + if (span->start_ts != 0) { + fprintf(stderr, "BUG %llu .. %llu, duration too big %lld\n", + span->start_ts, event.start_ts, span->duration); + span->duration = 1000000; // 10 msec + } + } + } + +// Need to match up return value with call +// 5330796455 0 2049 3 4052 0 1 0 0 write (801) <=== arg0, no retval +// 5330796988 1 518 3 4052 0 3644 0 0 runnable (206) +// 5330797239 1 519 3 4052 0 1 0 0 sendipi (207) +// 5330797466 0 2561 3 4052 0 0 12 0 /write (a01) <=== retval: write(1)=12 + + // For IsOptimizedCall call/ret, entire span was already consumed + // For unoptimized, span's return value is in the ending event + if (IsAReturn(event)) { + span->retval = event.retval; + } + + // This span's ipc is in the ending event + span->ipc = event.ipc & ipc_mask; + + // Remember the end of last instance of each PID user-mode execution + if ((span->pid > 0) && (span->cpu >= 0) /* && IsUserExecnum(span->eventnum) */ ) { + priorPidEnd[span->pid] = span->start_ts + span->duration; + } +} + +// Open up a new span +void StartSpan(const OneSpan& event, OneSpan* span) { + span->start_ts = event.start_ts; + span->duration = 0; + span->cpu = event.cpu; + span->pid = event.pid; + span->rpcid = event.rpcid; + span->eventnum = event.eventnum; + span->arg = event.arg; + span->retval = event.retval; + span->ipc = 0; + span->name = event.name; + // Clean up any user span with leftover arg/ret + // Clean up uer-mode thread name + if (IsUserExecnum(event.eventnum)) { + span->arg = 0; + span->retval = 0; + } +} + +void MakeArcSpan(const OneSpan& event1, const OneSpan& event2, OneSpan* span) { + span->start_ts = event1.start_ts; + span->duration = event2.start_ts - event1.start_ts; + span->cpu = event1.cpu; + span->pid = event1.pid; + span->rpcid = event1.rpcid; + span->eventnum = ArcNum; + span->arg = event2.cpu; + span->retval = event2.pid; // Added 2020.08.20 + span->ipc = 0; + span->name = "-wakeup-"; +} + +// Waiting on reason c from event1 to event2. For PID or RPC, not on any CPU +// Make a waiting span: letter, time, dur, CPU -1, PID, RPC, name +// letter is 'a' through 'z' +// start time is from whenever the given PID was last running +// end time is from whenever the given PID runs again +// RPCid is from whenever the given PID runs again +// name is cpu, disk, net, etc. for generic waits +// name is disk_sdb1 if specific disk is available +// name is lock_ffffff:llll if specific lock source filename and line are available +// +// For PID and RPC only; not CPU-specific +void MakeWaitSpan(char letter, uint64 start_ts, uint64 end_ts, int pid, int rpcid, OneSpan* span) { + // Start 10 nsec late, so that HTML searches can find non-wait event + span->start_ts = start_ts + 1; // last execution of PID,or wakeup time + span->duration = end_ts - start_ts - 1; // next execution of PID + if (start_ts == end_ts) {span->duration = 0;} // We will throw this span away + span->cpu = -1; + span->pid = pid; + span->rpcid = rpcid; + if (letter < 'a') {letter = 'a';} + if ('z' < letter) {letter = 'z';} + span->eventnum = KUTRACE_WAITA + (letter - 'a'); + span->arg = 0; + span->retval = 0; + span->ipc = 0; + span->name = kWAIT_NAMES[letter - 'a']; +} + +// For PID only; not CPU- or RPC-specific +void MakeLockSpan(bool dots, uint64 start_ts, uint64 end_ts, int pid, + int lockhash, const string& lockname, OneSpan* span) { + span->start_ts = start_ts; + span->duration = end_ts - start_ts; + span->cpu = -1; + span->pid = pid; + span->rpcid = -1; + span->eventnum = dots ? KUTRACE_LOCK_TRY : KUTRACE_LOCK_HELD; + span->arg = lockhash; + span->retval = 0; + span->ipc = 0; + span->name = lockname; +} + +// To insert just after context switch back to a preempted in-progress RPC +// For all of CPU, PID, and RPC +void MakeRpcidMidSpan(uint64 start_ts, int cpu, int pid, int rpcid, OneSpan* span) { + char rpc_name[64]; + sprintf(rpc_name, "%s.%d", methodnames[rpcid].c_str(), rpcid); + + span->start_ts = start_ts; + span->duration = 1; + span->cpu = cpu; + span->pid = pid; + span->rpcid = rpcid; + span->eventnum = KUTRACE_RPCIDMID; + span->arg = rpcid; + span->retval = 0; + span->ipc = 0; + span->name = string(rpc_name); +} + +// To insert just after dequeuing an RPC +// For RPC only; not CPU- or PID-specific +void MakeQueuedSpan(uint64 start_ts, uint64 end_ts, int queue_num, int rpcid, OneSpan* span) { + span->start_ts = start_ts; + span->duration = end_ts - start_ts; + span->cpu = -1; + span->pid = -1; + span->rpcid = rpcid; + span->eventnum = KUTRACE_ENQUEUE; + span->arg = queue_num; + span->retval = 0; + span->ipc = 0; + span->name = string(queuenames[queue_num]); +} + + +// If we turned the current span idle into c_exit, now put it back +// OBSOLETE. unused +void CexitBackToIdle(OneSpan* span) { + if (span->eventnum != event_c_exit) {return;} + span->eventnum = event_idle; + span->name = string(kIdleName); +//fprintf(stdout, "CexitBackToIdle at %llu\n", span->start_ts); +} + +// Make sure bugs about renaming the idle pid are gone. DEFUNCT +void CheckSpan(const char* label, const CPUState* thiscpu) { + bool fail = false; + const OneSpan* span = &thiscpu->cur_span; + if ((span->name == string(kIdleName)) && + (span->eventnum != event_idle)) {fail = true;} + for (int i = 0; i < 5; ++i) { + if ((thiscpu->cpu_stack.name[i] == string(kIdleName)) && + (thiscpu->cpu_stack.eventnum[i] != event_idle)) {fail = true;} + } + if (fail) { + fprintf(stderr, "\nCheckSpan failed ==================================\n"); + fprintf(stdout, "\nCheckSpan failed ==================================\n"); + DumpSpan(stdout, label, span); + DumpStack(stdout, label, &thiscpu->cpu_stack); + } +} + +// Write the current timespan and start a new one +// Change time from multiples of 10ns to seconds +// ts dur CPU tid rpc event arg0 ret name +// Make time stamp ts 12.8 so fixed width for later text sort +void WriteSpanJson2(FILE* f, const OneSpan* span) { + if (span->start_ts == 0) {return;} // Front of trace for each CPU + if (span->duration > kMAX_PLAUSIBLE_DURATION) {return;} // More than 8 sec in 10ns increments + + // Output + // time dur cpu pid rpcid event arg retval ipc name + // Change time from multiples of 10 nsec to seconds and fraction + double ts_sec = span->start_ts / 100000000.0; + double dur_sec = span->duration / 100000000.0; +//CHECK("f", *span); + // ts dur cpu pid rpc event arg ret ipc name + fprintf(f, "[%12.8f, %10.8f, %d, %d, %d, %d, %d, %d, %d, \"%s\"],", + ts_sec, dur_sec, span->cpu, + span->pid, span->rpcid, span->eventnum, + span->arg, span->retval, span->ipc, span->name.c_str()); + ++span_count; + fprintf(f, "\n"); + + // Stastics + if (IsUserExecNonidlenum(span->eventnum)) { + total_usermode += dur_sec; + } else if (IsAnIdlenum(span->eventnum)) { + total_idle += dur_sec; + } else if (IsKernelmodenum(span->eventnum)) { + total_kernelmode += dur_sec; + } else { + total_other += dur_sec; + } +} + +void WriteSpanJson(FILE* f, const CPUState* thiscpu) { + WriteSpanJson2(f, &thiscpu->cur_span); +} + +// Write a point event, so they aren't lost +// Change time from multiples of 10 nsec to seconds and fraction +void WriteEventJson(FILE* f, const OneSpan* event) { + double ts_sec = event->start_ts / 100000000.0; + double dur_sec = event->duration / 100000000.0; +//CHECK("g", *event); + // ts dur cpu pid rpc event arg ret ipc name + fprintf(f, "[%12.8f, %10.8f, %d, %d, %d, %d, %d, %d, %d, \"%s\"],\n", + ts_sec, dur_sec, event->cpu, + event->pid, event->rpcid, event->eventnum, + event->arg, event->retval, event->ipc, event->name.c_str()); + ++span_count; +} + +// Open the json variable and give inital values +void InitialJson(FILE* f, const char* label, const char* basetime) { + // Generate a pseudo-ranndom trace ID number, in case we need to distinguish + // saved state for different traces beyond their basetime + unsigned int randomid = (time(NULL) ^ (getpid() * 12345678)) & 0x7FFFFFFF; + + // Leading spaces are to keep this all in front and in order after text sort + fprintf(f, " {\n"); + fprintf(f, " \"Comment\" : \"V2 with IPC field\",\n"); + fprintf(f, " \"axisLabelX\" : \"Time (sec)\",\n"); + fprintf(f, " \"axisLabelY\" : \"CPU Number\",\n"); + fprintf(f, " \"flags\" : %d,\n", incoming_flags); + fprintf(f, " \"randomid\" : %d,\n", randomid); + fprintf(f, " \"shortUnitsX\" : \"s\",\n"); + fprintf(f, " \"shortMulX\" : 1,\n"); + fprintf(f, " \"thousandsX\" : 1000,\n"); + fprintf(f, " \"title\" : \"%s\",\n", label); + fprintf(f, " \"tracebase\" : \"%s\",\n", basetime); + fprintf(f, " \"version\" : %d,\n", incoming_version); + if (!kernel_version.empty()) { + Clean(&kernel_version); + fprintf(f, " \"kernelVersion\" : \"%s\",\n", kernel_version.c_str()); + } + if (!cpu_model_name.empty()) { + Clean(&cpu_model_name); + fprintf(f, " \"cpuModelName\" : \"%s\",\n", cpu_model_name.c_str()); + } + if (!host_name.empty()) { + Clean(&host_name); + fprintf(f, " \"hostName\" : \"%s\",\n", host_name.c_str()); + } + + fprintf(f, "\"events\" : [\n"); +} + +// Add dummy entry that sorts last, then close the events array and top-level json +void FinalJson(FILE* f) { + fprintf(f, "[999.0, 0.0, 0, 0, 0, 0, 0, 0, 0, \"\"]\n"); // no comma + fprintf(f, "]}\n"); +} + +// Design for push/pop of nested kernel routines +// Each per-CPU stack, thiscpu->cpu_stack, keeps track of routines that have been +// entered but not yet exited. In general, entry/exit events in the trace can be +// missing or unbalanced. At the beginning of a trace, there can be early exit +// events with no prior entry. Some events exit abnormally with no explicit +// exit event. Events are somewhat constrained in thier nesting, which helps +// track what must be going on. NestLevel reflects these constraints. +// If A is on the stack and a call to B occurs, the NestLevel of B should be +// greater than the NesdtLevel of A. If not, a synthetic pop of A is inserted. +// Conversely, if A is on the stack and a return from B occurs, a synthetic +// call to B is inserted. If A and B (top) are stacked and a return from A occurs, +// a synthetic return from B is inserted. +// +// In particular, there is an ambiguity for syscalls. Some return normally with +// a sysret event. Others exit directly though the scheduler with no sysret. +// Others block, go thru the scheduler, then later resume and exit normally +// with a sysret event (or block multiple times before normal exit). There is +// not enough information in the raw trace to distinguish immediate return from +// blocking with later return. With process A running, we expect the sequence +// syscall C, sysret C +// but if instead we get +// syscall C, sched, ctxswitch to B, /sched +// we leave the syscall on the stack but mark the stack ambiguous. This stack +// is saved for PID=A. Later, when we encounter a context switch back to process A, +// ..., sched, ctxswitch to A, /sched, N +// the A stack is restored at ctxswitch, but we don't know whether exiting the +// scheduler goes to suspended syscall C or all the way out to user code A. +// +// We resolve the ambiguity by setting up the span after sched, but don't commit to +// what is running in that span until we reach the next event N. If N is something +// that can only be done in kernel mode (sched, runnable, kernel PC sample, user PID, +// sendIPI, mwait, freq, sysret) we remove the ambiguous mark and set the span to +// the syscall. If N is something that can only be done in user mode (user PC +// sample, syscall) we remove the ambiguous mark, pop the stack, and set the +// span to user process A. If neither occurs (e.g. a disk interrupt), we leave +// the stack ambiguous but default the span to be user process A. (We could +// conceivably create a new span type "ambiguous" and perhaps display it as gray.) +// In this last case, the ambiguity will eventually be resolved later. If we +// had a multi-pass eventtospan design, we could go back and fix up the ambiguous +// spans. But this is really getting to be an edge case. + +// Nesting levels are user:0, syscall:1, trap:2, IRQ:3, sched_syscall:4. +// It is only legal to call to a numerically larger nesting level +// Note that we can skip levels, so these are not exactly stack depth +int NestLevel(int eventnum) { + if (largest_non_pid < eventnum) {return 0;} // User-mode pid + if (eventnum == sched_syscall) {return 4;} // Enter the scheduler + // must precede syscall below + if ((eventnum & call_ret_mask) == KUTRACE_SYSCALL64) {return 1;} // syscall/ret + if ((eventnum & call_ret_mask) == KUTRACE_SYSCALL32) {return 1;} // syscall/ret + if ((eventnum & type_mask) == KUTRACE_TRAP) {return 2;} // trap + if ((eventnum & type_mask) == KUTRACE_IRQ) {return 3;} // interrupt + return 1; // error; pretend it is a syscall +} + +// This deals with mis-nested call +void AdjustStackForPush(const OneSpan& event, CPUState* thiscpu) { + while (NestLevel(event.eventnum) <= + NestLevel(thiscpu->cpu_stack.eventnum[thiscpu->cpu_stack.top])) { +fprintf(stdout,"AdjustStackForPush FAIL\n"); + // Insert dummy returns, i.e. pop, until the call is legal or we are at user-mode level + if (thiscpu->cpu_stack.top == 0) {break;} +if (verbose) fprintf(stdout, "-%d dummy return from %s\n", +event.cpu, thiscpu->cpu_stack.name[thiscpu->cpu_stack.top].c_str()); + --thiscpu->cpu_stack.top; + } +} + +// This deals with unbalanced return +void AdjustStackForPop(const OneSpan& event, CPUState* thiscpu) { + if (thiscpu->cpu_stack.top == 0) { +fprintf(stdout,"AdjustStackForPop FAIL\n"); + // Trying to return above user mode. Push a dummy syscall +if (verbose) fprintf(stdout, "+%d dummy call to %s\n", event.cpu, event.name.c_str()); + ++thiscpu->cpu_stack.top; + thiscpu->cpu_stack.eventnum[thiscpu->cpu_stack.top] = dummy_syscall; + thiscpu->cpu_stack.name[thiscpu->cpu_stack.top] = string("-dummy-"); + } + // If returning from something lower nesting than top of stack, + // pop the stack for a match. + int matching_call = event.eventnum & ~ret_mask; // Turn off the return bit + while (NestLevel(matching_call) < + NestLevel(thiscpu->cpu_stack.eventnum[thiscpu->cpu_stack.top])) { +fprintf(stdout,"AdjustStackForPop FAIL\n"); + // Insert dummy returns, i.e. pop, until the call is legal or we are at user-mode level + if (thiscpu->cpu_stack.top == 1) {break;} +if (verbose) fprintf(stdout, "-%d dummy return from %s\n", +event.cpu, thiscpu->cpu_stack.name[thiscpu->cpu_stack.top].c_str()); + --thiscpu->cpu_stack.top; + } +} + +// Add the pid# to the end of user-mode name, if not already there +string AppendPid(const string& name, uint64 pid) { + char pidnum_temp[16]; + sprintf(pidnum_temp, ".%lld", pid & 0xffff); + if (strstr(name.c_str(), pidnum_temp) == NULL) { + return name + string(pidnum_temp); + } + return name; +} + +string EventNamePlusPid(const OneSpan& event) { + return AppendPid(event.name, event.pid); +} + +void DumpShort(FILE* f, const CPUState* thiscpu) { + fprintf(f, "\t"); + DumpStackShort(f, &thiscpu->cpu_stack); + fprintf(f, "\t"); + DumpSpanShort(f, &thiscpu->cur_span); + fprintf(f, "\n"); +} + +// Insert wait_* span for reason that we were waiting +void WaitBeforeWakeup(const OneSpan& event, CPUState* cpustate, PerPidState* perPidState) { + CPUState* thiscpu = &cpustate[event.cpu]; + int target_pid = event.arg; + + // The wakeup has a target PID. We keep a list of the most recent user-mode event + // mentioning that PID, if any. The time from last mention to now is the + // waiting time; the current wakeup event signals the end of that waiting. + // The top of the per-CPU call stack says what kernel routine is doing the wakeup. + // TRICKY: The target PID might actually be running or in the scheduler right now, + // about to be context switched out. Inthat case, avoid any before-wakeup event. + + // There is no priorPidEvent at the beginning of a trace. + if (priorPidEvent.find(target_pid) == priorPidEvent.end()) {return;} + + // If the target PID is currently executing, do not generate a wait + if (pidRunning.find(target_pid) != pidRunning.end()) {return;} + + OneSpan& old_event = priorPidEvent[target_pid]; + const PidState* stack = &thiscpu->cpu_stack; + + // Create wait_* events + // Also see soft_irq_name in rawtoevent.cc + char letter = ' '; // Default = unknown reason for waiting + if (stack->name[stack->top] == "local_timer_vector") { // timer + letter = 't'; // timer + } else if (stack->name[stack->top] == "arch_timer") { // Rpi time + letter = 't'; // timer + } else if (stack->name[stack->top] == "page_fault") { // memory + letter = 'm'; // memory + } else if (stack->name[stack->top] == "mmap") { + letter = 'm'; // memory + } else if (stack->name[stack->top] == "munmap") { + letter = 'm'; // memory + } else if (stack->name[stack->top] == "mprotect") { + letter = 'm'; // memory + } else if (stack->name[stack->top] == "futex") { // lock + letter = 'l'; // lock + } else if (stack->name[stack->top] == "writev") { // pipe + letter = 'p'; // pipe + } else if (stack->name[stack->top] == "write") { + letter = 'p'; // pipe + } else if (stack->name[stack->top] == "sendto") { + letter = 'p'; // pipe + } else if (stack->name[stack->top].substr(0,7) == "kworker") { + letter = 'p'; // pipe + } else if (stack->name[stack->top] == "BH:hi") { // tasklet + letter = 'k'; // high prio tasklet or unknown BH fragment + } else if (stack->name[stack->top] == "BH:timer") { // time + letter = 't'; // timer + } else if (stack->name[stack->top] == "BH:tx") { // network + letter = 'n'; // network + } else if (stack->name[stack->top] == "BH:rx") { + letter = 'n'; // network + } else if (stack->name[stack->top] == "BH:block") { // disk + letter = 'd'; // disk/SSD + } else if (stack->name[stack->top] == "BH:irq_p") { + letter = 'd'; // disk/SSD (iopoll) + } else if (stack->name[stack->top] == "syncfs") { + letter = 'd'; // disk/SSD + } else if (stack->name[stack->top] == "BH:taskl") { + letter = 'k'; // normal tasklet + } else if (stack->name[stack->top] == "BH:sched") { // sched + letter = 's'; // scheduler (load balancing) + } else if (stack->name[stack->top] == "BH:hrtim") { + letter = 't'; // timer + } else if (stack->name[stack->top] == "BH:rcu") { + letter = 't'; // read-copy-update release code + } + + if ((letter != ' ')) { + // Make a wait_* display span + OneSpan temp_span = thiscpu->cur_span; // Save + MakeWaitSpan(letter, priorPidEnd[target_pid], + event.start_ts, target_pid, old_event.rpcid, &thiscpu->cur_span); + + // Don't clutter if the waiting is short (say < 10 usec) + if (thiscpu->cur_span.duration >= kMIN_WAIT_DURATION) { + WriteSpanJson(stdout, thiscpu); // Standalone wait_cpu span + } + thiscpu->cur_span = temp_span; // Restore + } +} + +void WaitAfterWakeup(const OneSpan& event, CPUState* cpustate, PerPidState* perpidstate) { + CPUState* thiscpu = &cpustate[event.cpu]; + int target_pid = event.arg; +} + +void DoWakeup(const OneSpan& event, CPUState* cpustate, PerPidState* perpidstate) { + CPUState* thiscpu = &cpustate[event.cpu]; + int target_pid = event.arg; + // Remember the wakeup + pendingWakeup[target_pid] = event; + + // Any subsequent waiting will be for CPU, starting at this wakeup + priorPidEnd[target_pid] = event.start_ts + event.duration; +} + +void SwapStacks(int oldpid, int newpid, const string& name, CPUState* thiscpu, PerPidState* perpidstate) { + if (oldpid == newpid) {return;} + + // Swap out the old thread's stack, but don't change the idle stack + if (oldpid != 0) { + (*perpidstate)[oldpid] = thiscpu->cpu_stack; + } +if (verbose) { +fprintf(stdout, "SwapStacks old %d: ", oldpid); +DumpStackShort(stdout, &thiscpu->cpu_stack); +} + if (perpidstate->find(newpid) == perpidstate->end()) { + // Switching to a thread we haven't seen before. Should only happen at trace start. + // Create a two-item stack of just user-mode pid and sched_syscall + BrandNewPid(newpid, name, perpidstate); + } + + thiscpu->cpu_stack = (*perpidstate)[newpid]; + +if (verbose) { + fprintf(stdout, "new %d: ", newpid); + DumpStackShort(stdout, &thiscpu->cpu_stack); + fprintf(stdout, "\n"); + } +} + +// An ambiguous call stack might be running in the current top or might be +// running in user mode. We look at the terminating event of the current +// CPU span to try to resolve which it is. +void FixupAmbiguousSpan(const OneSpan& event, + CPUState* thiscpu) { + if (thiscpu->cpu_stack.ambiguous == 0) {return;} + // If running above the ambiguous stack entry, nothing to do + if (thiscpu->cpu_stack.ambiguous < thiscpu->cpu_stack.top) {return;} + +if (verbose) { +DumpStackShort(stdout, &thiscpu->cpu_stack); +fprintf(stdout, " ===ambiguous at %s :\n", event.name.c_str()); +} + if (OnlyInKernelMode(event)) { + thiscpu->cpu_stack.ambiguous = 0; + // Span was set to top of stack, so we are all done +if (verbose) fprintf(stdout, "=== resolved kernel\n"); + return; + } + if (OnlyInUserMode(event)) { + thiscpu->cpu_stack.ambiguous = 0; + // Span was set to top of stack, but we need to pop back to user mode + thiscpu->cpu_stack.top = 0; + thiscpu->cur_span.eventnum = thiscpu->cpu_stack.eventnum[0]; + thiscpu->cur_span.name = thiscpu->cpu_stack.name[0]; +if (verbose) fprintf(stdout, "=== resolved user\n"); + return; + } + // If neither, leave ambiguous. Span shows top of stack +if (verbose) fprintf(stdout, "=== unresolved\n"); +} + +uint64 PackLock(int lockhash, int pid) { + uint64 retval = pid & 0x00000000ffffffffllu; + retval |= (lockhash & 0x00000000ffffffffllu) << 32; + return retval; +} + +void WriteFreqSpan(uint64 start_ts, uint64 end_ts, uint64 cpu, uint64 freq) { + OneSpan event; + event.start_ts = start_ts; + event.duration = end_ts - start_ts; + event.cpu = cpu; + event.pid = 0; + event.rpcid = 0; + event.eventnum = KUTRACE_PSTATE; + event.arg = freq; + event.retval = 0; + event.ipc = 0; + event.name = string("freq"); + WriteEventJson(stdout, &event); +} + + +// +// Each call will update the current duration for this CPU and emit it, except +// A few events do not close the current span: +// PC samples, pstate changes (freq), point events not below +// A few events interrupt the current span but then leave it continuing with new ts_start: +// RPC events, mark_a/b/c/d, c_exit, mwait +// +// +void ProcessEvent(const OneSpan& event, + CPUState* cpustate, + PerPidState* perpidstate) { + CPUState* thiscpu = &cpustate[event.cpu]; + + if (verbose) { + fprintf(stdout, "zz[%d] %llu %llu %03x(%d)=%d %s ", + event.cpu, event.start_ts, event.duration, + event.eventnum, event.arg, event.retval, event.name.c_str()); + DumpEvent(stdout, "", event); + DumpShort(stdout, &cpustate[event.cpu]); + } + + // Remember last instance of each PID + // We want to do this for the events that finish execution spans + if ((event.pid > 0) && (event.cpu >= 0)) { + priorPidEvent[event.pid] = event; +//fprintf(stdout, "~~ ~~ priorPidEvent[%d] = %llu\n", event.pid, event.start_ts); + } + + // Remember that there is no pending context switch + if (IsSchedCallEvent(event) || IsSchedReturnEvent(event)) { + thiscpu->ctx_switch_ts = 0; +//fprintf(stdout, "~~ ~~ ctx_switch_ts[%d] = 0\n", event.cpu); + } + + // Keep track of which PIDs are currently running + if (IsSchedReturnEvent(event)) { + pidRunning.erase(thiscpu->oldpid); + pidRunning[thiscpu->newpid] = true; + + // Restore nonzero rpcid for a preempted task that we are returning to + if (thiscpu->cpu_stack.rpcid != 0) { + OneSpan temp_span; + MakeRpcidMidSpan(event.start_ts, event.cpu, event.pid, thiscpu->cpu_stack.rpcid, &temp_span); + WriteSpanJson2(stdout, &temp_span); + } + } + + // This event may reveal whether the current span is executing in user or kernel mode + FixupAmbiguousSpan(event, thiscpu); + + // Normally, rawtoevent propogates the RPCID from these events to subsequent events. + // Unfortunately, this doesn't track across context switches when an active RPC is + // preempted and then later resumed. + // So instead, we completely reconstruct rpcid here in eventtospan3, usin gnot event.rpcid + // but thiscpu->rpcid, which is saved and restored across context switches. + // An RPC event sets thiscpu->rpcid, and thiscpu->rpcid overrides event.rpc otherwise + + if (IsAnRpc(event)) { + // Start a new span here so we don't assign RPC time to previous work + if (thiscpu->valid_span) { + // Prior span stops here --------^^^^^^^^ + FinishSpan(event, &thiscpu->cur_span); + WriteSpanJson(stdout, thiscpu); // Previous span + } + WriteEventJson(stdout, &event); // Standalone mark + +// This is looking just like IsAMark +// Just update the still-open span start + // Continue what we were doing, with new start_ts + thiscpu->cur_span.start_ts = event.start_ts + event.duration; + // Update CPU and current span's rpcid + thiscpu->cpu_stack.rpcid = event.arg; // 2021.02.05 + thiscpu->cur_span.rpcid = event.arg; + return; + } + + + if (IsAContextSwitch(event)) { + // Context switch + // Current user-mode pid, seen at context switch and at front of each + // trace block. + // We expect this to match the [0] entry of the cpu's thread stack, + // but it might not at the very front of a trace or at the oldest blocks + // of a wraparound trace. When that happens, overwrite stack[0]. + // If the stack top is 0, also update the current span. + + // Remember this pending context switch time, in case /sched is missing + thiscpu->ctx_switch_ts = event.start_ts; +//fprintf(stdout, "~~ ~~ ctx_switch_ts[%d] = %llu\n", event.cpu, event.start_ts); + + // Mark the old stack ambiguous if inside kernel code + thiscpu->cpu_stack.ambiguous = 0; +if (verbose) DumpStackShort(stdout, &thiscpu->cpu_stack); + if (2 <= thiscpu->cpu_stack.top) { + // Scheduler entered from within a kernel routine + // stack such as: 2{mystery25.3950 read -sched- }0 + // Record the subscript of the ambiguous stack entry just before -sched- +if (verbose) fprintf(stdout, " ===marking old stack ambiguous at ctx_switch to %s\n", event.name.c_str()); + thiscpu->cpu_stack.ambiguous = thiscpu->cpu_stack.top - 1; + } + + thiscpu->oldpid = EventnumToPid(thiscpu->cpu_stack.eventnum[0]); + thiscpu->newpid = event.pid; + + // + // Swap out the old thread's stack and swap in the new thread's stack + // + SwapStacks(thiscpu->oldpid, thiscpu->newpid, event.name, thiscpu, perpidstate); + + // We came in with the sched span started. just leave it alone here. + +#if 1 + // Turn context switch event into a user-mode-execution event at top of stack + thiscpu->cpu_stack.eventnum[0] = PidToEventnum(event.pid); + ////sthiscpu->cpu_stack.name[0] = EventNamePlusPid(event); + thiscpu->cpu_stack.name[0] = NameAppendPid(pidnames[event.pid], event.pid); + + // And also update the current span if we are at top + if (thiscpu->cpu_stack.top == 0) { + // Update user-mode-execution event at top of stack + //VERYTEMP + bool xx = false && (thiscpu->cur_span.eventnum != PidToEventnum(event.pid)); + if (xx) { + fprintf(stderr, "oldevent=%05x newevent=%05x\n", thiscpu->cur_span.eventnum, PidToEventnum(event.pid)); + } + StartSpan(event, &thiscpu->cur_span); // userexec span start --------vvvvvvvv + thiscpu->valid_span = true; + thiscpu->cur_span.eventnum = thiscpu->cpu_stack.eventnum[thiscpu->cpu_stack.top]; + thiscpu->cur_span.name = thiscpu->cpu_stack.name[thiscpu->cpu_stack.top]; + + if (xx) { + DumpEvent(stderr, "ctx", event); + DumpSpan(stderr, " ctx", &thiscpu->cur_span); + DumpStack(stderr, "ctx", &thiscpu->cpu_stack); + } + } +#endif + + return; + } + + // If we have a PC sample for this CPU, assign it a duration up to the following sample + // We do this by buffering one sample and emitting it later + // Do not touch current span + if (IsAPcSample(event)) { + // Sample goes back to prior sample, if any + if (thiscpu->prior_pc_samp_ts != 0) { + // event is const so we can't modify it + OneSpan event1 = event; + event1.start_ts = thiscpu->prior_pc_samp_ts; + event1.duration = event.start_ts - event1.start_ts; + WriteEventJson(stdout, &event1); + } + thiscpu->prior_pc_samp_ts = event.start_ts; + return; + } + + // Similar for pstate (clock speed) + // Do not touch current span + if (IsAPstate(event)) { + // PSTATE sampled freq goes back to prior pstate, if any + // PSTATE2 notified freq goes forward to next pstate2, if any + // PSTATE: prior_pstate_ts..now = the current freq + // PSTATE2: prior_pstate_ts..now = the prior frequency + // At end of trace, we will flush out the last span + + if (thiscpu->prior_pstate_ts != 0) { + uint64 prior_ts = thiscpu->prior_pstate_ts; + uint64 this_freq = event.arg; + uint64 prior_freq = thiscpu->prior_pstate_freq; + uint64 freq = (event.eventnum == KUTRACE_PSTATE) ? this_freq : prior_freq; + if (is_rpi) { + // Reflect frequency on all CPUs + for (int cpu = 0; cpu <= max_cpu_seen; ++cpu) { + WriteFreqSpan(prior_ts, event.start_ts, cpu, freq); + } + } else { + WriteFreqSpan(prior_ts, event.start_ts, event.cpu, freq); + } + } + + // Update one or all CPUs + if (is_rpi) { + // Change all CPU frequencies + for (int cpu = 0; cpu <= max_cpu_seen; ++cpu) { + cpustate[cpu].prior_pstate_ts = event.start_ts; + cpustate[cpu].prior_pstate_freq = event.arg; + } + } else { + thiscpu->prior_pstate_ts = event.start_ts; + thiscpu->prior_pstate_freq = event.arg; + } + + return; + } + + // If we have a non-KUTRACE_USERPID point event, do not affect the current span. + // Just write the point event now, leaving the current span open to be + // completed at a subsequent event + // + // 2019.05.14. Except go ahead and break spans at marks and mwait + if (IsAMark(event) || IsAnMwait(event)) { + if (thiscpu->valid_span) { + // Prior span stops here --------^^^^^^^^ + FinishSpan(event, &thiscpu->cur_span); + WriteSpanJson(stdout, thiscpu); // Previous span + } + WriteEventJson(stdout, &event); // Standalone mark/mwait/etc. + // Continue what we were doing, with new start_ts + thiscpu->cur_span.start_ts = event.start_ts + event.duration; + + // Remember any mwait by cpu, for drawing c-state exit sine wave + if (IsAnMwait(event)) { + thiscpu->mwait_pending = event.arg; + thiscpu->cur_span.arg = 1; // Mark continuing idle as low-power + thiscpu->cur_span.name = kIdlelpName; + } + + return; + + // Do not touch current span + // userpid, rpc, runnable, ipi, [mwait], pstate, [mark], lock, pc, wait + } else if (IsAPointEvent(event)) { // Marks do not end up here due to test just above + WriteEventJson(stdout, &event); // Standalone point event +//VERYTEMP +//if (IsAnRpcMsg(event)) {DumpEvent(stderr, "rpcmsg:", event);} + +/***** +Drawing lock-held lines +Normal long case: process A fails to get a lock, spins/waits until process B frees the lock and wakes up A, A exits wait and tries again + Draw B lock_line from A fail to B free +Normal short case: process A fails to get a lock, spins until process B frees the lock BUT no wakeup, A exits spin and tries again + Draw A lock_dots from A fail to A fail/acq (we don't know which process held the lock, which appears uncontended to B) +Abnormal case: Old file has no process B KUTRACE_LOCKWAKEUP but has B set-runnable A arc and A is waiting on a lock. + Draw lock_line from A fail to B beginning of futex that contains set-runnable +Multiple waiters: process A' fails to get lock with A already waiting. Do not change start ts + +Prior Event Action +none LOCKNOACQUIRE set LockContend{ts-1us, -1} unknown lock holder. silent acquire +LOCKNOACQUIRE LOCKNOACQUIRE -- normal. multiple tries +LOCKACQUIRE LOCKNOACQUIRE -- normal. acq pid holds lock +LOCKWAKEUP LOCKNOACQUIRE BUG + +none LOCKACQUIRE set LockContend{ts, pid} normal. continuing waiters +LOCKNOACQUIRE LOCKACQUIRE line/dots-1us, set LockContend{ts, pid} silent release +LOCKACQUIRE LOCKACQUIRE line/dots-1us, set LockContend{ts, pid} silent release +LOCKWAKEUP LOCKACQUIRE BUG + +none LOCKWAKEUP line before silent acquire, sometime before by us +LOCKNOACQUIRE LOCKWAKEUP line, clear LockContend normal. +LOCKACQUIRE LOCKWAKEUP line, clear LockContend normal. +LOCKWAKEUP LOCKWAKEUP BUG + +arg0 = lock hash +process A contended +at KUTRACE_LOCKNOACQUIRE, if hash unseen, remember ts,A,hash. Start of known lock held but by unknown PID holding it + +at KUTRACE_LOCKNOACQUIRE, if hash already pending by our thread, there was a hidden wakeup by unknown PID + emit lock_dots from remembered ts to current ts - 1us, A, hash + reset prior ts to current ts for new contended span +process A' contended +at KUTRACE_LOCKNOACQUIRE, if hash already pending by other thread fail, add prior start_ts,A',hash + +Process A contended acquire +at KUTRACE_LOCKACQUIRE, if hash already noacq pending by our thread, there was a hidden wakeup by unknown PID + emit lock_dots from remembered ts to current ts - 1us, A, hash + remove current pid from waiters + remember the acquire time -- it *is* contended or we would not have emitted an acquire +process B contended release +at KUTRACE_LOCKWAKEUP, emit lock_line from start_ts - 1us to current ts, B, hash + remove all waiters for hash + +releasing process holds the lock back to earliest of acquire or noacquire. If notne of those, nominal 1us +acquiring process may encounter a hidden wakeup after its own fail, in which case emit dots + + +// Contended lockpending +// Really just map where struct has ts and list of waiting pids +*****/ + +// Things that can happen in the trace +// 1) CPU A releases lock, spinning CPU B acquires it immediately, produces ACQ trace entry, then A produces REL entry 10-20ns later +// 2) CPU A releases lock, spinning CPU B acquires it immediately, produces ACQ trace entry, A produces REL entry with equal time +// 3) CPU A releases lock, just as spinning CPU B fails to acquire it, A produces REL entry, B then produces TRY entry 30ns later +// +// In these cases, we actually want to process the acq after the rel +// In the meantime, suppressing lock_held lines < 1/2 us helps +// +// 4) CPU A releases lock, spinning CPU B acquires it immediately, but interrupt delays recording ACQ entry, +// meanwhile CPU C fails to acquire with unknown holder of the lock +// + + + // Point event + // Remember any failed lock acquire event if nothing is pending + if (event.eventnum == KUTRACE_LOCKNOACQUIRE) { + // Remember that this PID is trying to get this lock + int lockhash = event.arg; + uint64 subscr = PackLock(lockhash, event.pid); + LockContend lockcontend; + lockcontend.start_ts = event.start_ts; + lockcontend.pid = event.pid; + lockcontend.eventnum = event.eventnum; + lockpending[subscr] = lockcontend; + } + + // Process any successful lock acquire event + if (event.eventnum == KUTRACE_LOCKACQUIRE) { + int lockhash = event.arg; + uint64 subscr = PackLock(lockhash, event.pid); + // If prior try, draw dots for this PID trying to get this lock + if ((lockpending.find(subscr) != lockpending.end()) && + (lockpending[subscr].eventnum == KUTRACE_LOCKNOACQUIRE)) { + uint64 start_ts = lockpending[subscr].start_ts; + uint64 end_ts = event.start_ts - 1; // Stop 10 ns early + // Ignore contention < 250ns + if (25 <= (end_ts - start_ts)) { + bool dots = true; + string lockname = "~" + event.name.substr(4); // Remove try_ acq_ rel_ + OneSpan temp_span; + MakeLockSpan(dots, start_ts, end_ts, event.pid, + lockhash, lockname, &temp_span); + WriteSpanJson2(stdout, &temp_span); + } + } + // Remember that this PID now holds this lock + LockContend lockcontend; + lockcontend.start_ts = event.start_ts; + lockcontend.pid = event.pid; + lockcontend.eventnum = event.eventnum ; + lockpending[subscr] = lockcontend; + } + + // Process any lock wakeup (release) event + if (event.eventnum == KUTRACE_LOCKWAKEUP) { + int lockhash = event.arg; + uint64 subscr = PackLock(lockhash, event.pid); + // If prior acq, draw line for this PID holding this lock + if ((lockpending.find(subscr) != lockpending.end()) && + (lockpending[subscr].eventnum == KUTRACE_LOCKACQUIRE)) { + uint64 start_ts = lockpending[subscr].start_ts; + uint64 end_ts = event.start_ts - 1; // Stop 10 ns early + // Ignore contention < 250ns + if (25 <= (end_ts - start_ts)) { + bool dots = false; + string lockname = "=" + event.name.substr(4); // Remove try_ acq_ rel_ + OneSpan temp_span; + MakeLockSpan(dots, start_ts, end_ts, event.pid, + lockhash, lockname, &temp_span); + WriteSpanJson2(stdout, &temp_span); + } + } + // This PID is no longer interested in the lock + lockpending.erase(subscr); + } + + + + // Point event + // Remember any make-runnable, aka wakeup, event by target pid, for drawing arc + if (IsAWakeup(event)) { + WaitBeforeWakeup(event, cpustate, perpidstate); + DoWakeup(event, cpustate, perpidstate); + WaitAfterWakeup(event, cpustate, perpidstate); + } + + return; + } // End point event + + OneSpan oldspan = thiscpu->cur_span; + // +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----------+ + // | ts | /// | cpu | pid | rpc |event| arg | ret | /// | name | + // +-----+-----+-----+-----+-----+-----+-----+-----+-----+-----------+ + + // Prior span stops here --------^^^^^^^^ + if (thiscpu->valid_span) { + FinishSpan(event, &thiscpu->cur_span); + // Suppress idle spans of length zero or exactly 10ns + bool suppress = ((thiscpu->cur_span.duration <= 1) && IsAnIdlenum(thiscpu->cur_span.eventnum)); + if (!suppress) {WriteSpanJson(stdout, thiscpu);} // Previous span + } + + // Connect wakeup event to new span if the PID matches + if (pendingWakeup.find(event.pid) != pendingWakeup.end()) { + // We are at an event w/pid for which there is a pending wakeup, make-runnable + // Make a wakeup arc + OneSpan temp_span = thiscpu->cur_span; // Save + MakeArcSpan(pendingWakeup[event.pid], event, &thiscpu->cur_span); + WriteSpanJson(stdout, thiscpu); // Standalone arc span + // Consume the pending wakeup + pendingWakeup.erase(event.pid); + thiscpu->cur_span = temp_span; // Restore + } + + // Make a wait_cpu display span from the wakeup to here + if (priorPidEnd.find(event.pid) != priorPidEnd.end()) { + // We have been waiting for a CPU to become available and it did. + OneSpan temp_span = thiscpu->cur_span; // Save + MakeWaitSpan('c', priorPidEnd[event.pid], event.start_ts, event.pid, 0, &thiscpu->cur_span); + + ////// Consume the pending wait + ////priorPidEnd.erase(event.pid); + priorPidEnd[event.pid] = event.start_ts + event.duration; + // Don't clutter if the waiting is short (say < 10 usec) + if (thiscpu->cur_span.duration >= kMIN_WAIT_DURATION) { + WriteSpanJson(stdout, thiscpu); // Standalone wait_cpu span + } + thiscpu->cur_span = temp_span; // Restore + } + + // Don't start new span quite yet. + // If we have a return from foo and foo is on the stack, all is good. + // But if we have a return from foo and there is no foo on the stack, we don't + // want to overwrite whatever is there until we push a fake foo + + // Optimized calls are both call/return and are treated here as call + if (IsACall(event)) { + StartSpan(event, &thiscpu->cur_span); // Call span start --------vvvvvvvv + thiscpu->valid_span = true; + + if (IsOptimizedCall(event)) { + AdjustStackForPush(event, thiscpu); // Die if any -- preproc failed + // Emit the call span now but don't push + thiscpu->cur_span.duration = event.duration; + // Note: Optimized call/ret, prior span ipc in ipc<3:0>, current span in ipc<7:4> + thiscpu->cur_span.ipc = (event.ipc >> 4) & ipc_mask; + WriteSpanJson(stdout, thiscpu); // Standalone call-return span + // Continue what we were doing, with new start_ts + thiscpu->cur_span = oldspan; + thiscpu->cur_span.start_ts = event.start_ts + event.duration; + } else { + // Non-optimized call + // Push newly-pending call for later matching return + AdjustStackForPush(event, thiscpu); // Die if any -- preproc failed + ++thiscpu->cpu_stack.top; + thiscpu->cpu_stack.eventnum[thiscpu->cpu_stack.top] = event.eventnum; + thiscpu->cpu_stack.name[thiscpu->cpu_stack.top] = event.name; + } + + } else if (IsAReturn(event)) { + // Called span we are returning from got closed above. Start just after return. + // Adjust first, then start span at proper nesting level + AdjustStackForPop(event, thiscpu); // Die if any -- preproc failed + --thiscpu->cpu_stack.top; + StartSpan(event, &thiscpu->cur_span); // Post-return span --------vvvvvvvv + thiscpu->valid_span = true; + // If ambiguous, this defaults to the top of stack, e.g. inside a syscall + // When we finish this span, we will try to resolve the ambiguity and possibly change + // eventnum and name to the usermode PID at cpu_stack[0] + thiscpu->cur_span.eventnum = thiscpu->cpu_stack.eventnum[thiscpu->cpu_stack.top]; + thiscpu->cur_span.name = thiscpu->cpu_stack.name[thiscpu->cpu_stack.top]; + + } else if (IsUserExec(event)) { // context switch +#if 1 + StartSpan(event, &thiscpu->cur_span); // Post-switch span --------vvvvvvvv + thiscpu->valid_span = true; +#endif + + } else { + // c-exit and other synthesized items + // Make it a standalone span and go back to what was running + WriteEventJson(stdout, &event); + // Continue what we were doing, with new start_ts + StartSpan(event, &thiscpu->cur_span); // New start --------vvvvvvvv + thiscpu->valid_span = true; + thiscpu->cur_span = oldspan; + thiscpu->cur_span.start_ts = event.start_ts + event.duration; + } +} // End ProcessEvent + + +// Process one inserted event +void InsertEvent(const OneSpan& event, + CPUState* cpustate, + PerPidState* perpidstate) { + if (verbose) { + DumpEvent(stdout, "insert:", event); + } + ProcessEvent(event, cpustate, perpidstate); +} + + + +int CallToRet(int eventnum) {return eventnum | ret_mask;} +int RetToCall(int eventnum) {return eventnum & ~ret_mask;} + +string CallnameToRetname(string name) {return "/" + name;} // Add '/' +string RetnameToCallname(string name) {return name.substr(1);} // Remove '/' + +// Insert a dummy return at ts from TOS +void InsertReturnAt(uint64 ts, + const OneSpan& event, + CPUState* cpustate, + PerPidState* perpidstate) { + CPUState* thiscpu = &cpustate[event.cpu]; + PidState* thiscpu_stack = &thiscpu->cpu_stack; + + OneSpan newevent = event; + newevent.start_ts = ts; + newevent.duration = 0; + //newevent.cpu = xx; + //newevent.pid = xx; + //newevent.rpcid = xx; + newevent.eventnum = CallToRet(thiscpu_stack->eventnum[thiscpu_stack->top]); + newevent.arg = 0; + newevent.retval = 0; + //newevent.ipc = 0; + newevent.name = CallnameToRetname(thiscpu_stack->name[thiscpu_stack->top]); + InsertEvent(newevent, cpustate, perpidstate); +} + +// Insert a dummy call at ts to event (which is a return) +void InsertCallAt(uint64 ts, + const OneSpan& event, + CPUState* cpustate, + PerPidState* perpidstate) { + OneSpan newevent = event; + newevent.start_ts = ts; + newevent.duration = 0; + //newevent.cpu = xx; + //newevent.pid = xx; + //newevent.rpcid = xx; + newevent.eventnum = RetToCall(event.eventnum); + newevent.arg = 0; + newevent.retval = 0; + //newevent.ipc = 0; + newevent.name = RetnameToCallname(event.name); + InsertEvent(newevent, cpustate, perpidstate); +} + +// Insert a dummy call/return at ts to event (which is a call) +void InsertCallRetAt(uint64 ts, + const OneSpan& event, + CPUState* cpustate, + PerPidState* perpidstate) { + OneSpan newevent = event; + newevent.start_ts = ts; + //newevent.duration = xx; + //newevent.cpu = xx; + //newevent.pid = xx; + //newevent.rpcid = xx; + //newevent.eventnum = xx; + //newevent.arg = xx; + //newevent.retval = xx; + //newevent.ipc = xx; + //newevent.name = xx; + InsertEvent(newevent, cpustate, perpidstate); +} + +// Return from X +// if TOS = call to X, all is good +// if X is on the stack, pop to it +// if TOS = call to something that nests inside X, pop and repeat +// if TOS = Y and X is higher on the stack +// Fixup: If this event is a return from X and X has a smaller nesting level than top of stack, +// insert extra returns and pop stack +// Return true if the event is to be used, and false if it is to be discarded +bool FixupReturn(uint64 new_start_ts, + const OneSpan& event, + CPUState* cpustate, + PerPidState* perpidstate) { + CPUState* thiscpu = &cpustate[event.cpu]; + PidState* thiscpu_stack = &thiscpu->cpu_stack; + int matching_callnum = RetToCall(event.eventnum); + + // if TOS = call to X, all is good + if (thiscpu_stack->eventnum[thiscpu_stack->top] == matching_callnum) {return true;} + + // If TOS = reschedule_ipi and this = /BH:hi, let it match + if ((thiscpu_stack->name[thiscpu_stack->top] == "reschedule_ipi") && + (event.name == "/BH:hi")) {return true;} + + bool callfound = false; + for (int i = 1; i <= thiscpu_stack->top; ++i) { + if (thiscpu_stack->eventnum[i] == matching_callnum) {callfound = true;} + } + + // if X is on the stack, pop to it + OneSpan newevent; + if (callfound) { + // Insert dummy returns at now until TOS = X (we don't know the retval) + while (thiscpu_stack->eventnum[thiscpu_stack->top] != matching_callnum) { + // Return now from TOS +if(verbose){fprintf(stdout, "InsertReturnAt 1\n");} + InsertReturnAt(event.start_ts, event, cpustate, perpidstate); + } + return true; + } + + // At start of current span, insert dummy returns until nesting X is OK, then a dummy call to X + + // Insert dummy returns at new_start_ts until nesting X is OK (we don't know the retval) + while (NestLevel(matching_callnum) <= NestLevel(thiscpu_stack->eventnum[thiscpu_stack->top])) { + // Return at span_start_ts from TOS +if(verbose){fprintf(stdout, "InsertReturnAt 2\n");} + InsertReturnAt(new_start_ts, event, cpustate, perpidstate); + } + + // Insert dummy call at span_start_ts to X (we don't know the arg value) + InsertCallAt(new_start_ts, event, cpustate, perpidstate); + return true; +} + +bool FixupCall(uint64 new_start_ts, + const OneSpan& event, + CPUState* cpustate, + PerPidState* perpidstate) { + CPUState* thiscpu = &cpustate[event.cpu]; + PidState* thiscpu_stack = &thiscpu->cpu_stack; + int matching_callnum = RetToCall(event.eventnum); + + // Insert dummy returns at new_start_ts until nesting X is OK (we don't know the retval) + while (NestLevel(matching_callnum) <= NestLevel(thiscpu_stack->eventnum[thiscpu_stack->top])) { + // Return at span_start_ts from TOS +if(verbose){fprintf(stdout, "InsertReturnAt 3: %d %d\n", matching_callnum, thiscpu_stack->eventnum[thiscpu_stack->top]);} + InsertReturnAt(new_start_ts, event, cpustate, perpidstate); + } + return true; +} + +bool FixupResched(uint64 ts, + const OneSpan& event, + CPUState* cpustate, + PerPidState* perpidstate) { + CPUState* thiscpu = &cpustate[event.cpu]; + PidState* thiscpu_stack = &thiscpu->cpu_stack; + if (thiscpu_stack->name[thiscpu_stack->top] == "reschedule_ipi") { + --thiscpu_stack->top; + } + return true; +} + + +// We are exactly in sched but missing its return +bool FixupSched(uint64 new_start_ts, + const OneSpan& event, + CPUState* cpustate, + PerPidState* perpidstate) { + CPUState* thiscpu = &cpustate[event.cpu]; + PidState* thiscpu_stack = &thiscpu->cpu_stack; + int matching_callnum = RetToCall(event.eventnum); + + // Return at new_start_ts from TOS +if(verbose){fprintf(stdout, "InsertReturnAt 4\n");} + InsertReturnAt(new_start_ts, event, cpustate, perpidstate); + ////--thiscpu_stack->top; + return true; +} + + +// Fixup: Turn idle/mwait/idle/X into idle/mwait/idle/c-exit/X +// We are at X +// idle is on the stack[0] as PID=0 and as current span +bool FixupCexit(uint64 new_start_ts, + const OneSpan& event, + CPUState* cpustate, + PerPidState* perpidstate) { + CPUState* thiscpu = &cpustate[event.cpu]; + PidState* thiscpu_stack = &thiscpu->cpu_stack; + + // Table entries are unknown units; they appear to be multiples of 100ns + uint64 exit_latency = kLatencyTable[thiscpu->mwait_pending] * 10; + + uint64 pending_span_latency = new_start_ts - thiscpu->cur_span.start_ts; + + bool good_mwait = (thiscpu->cpu_stack.top == 0); // Expecting to be in user-mode + if (!good_mwait) { + // No change -- we are not immediately after a switch to idle + fprintf(stderr, "FixupCexit ignored %llu %llu %llu %d %05x\n", + new_start_ts, exit_latency, pending_span_latency, + thiscpu->cpu_stack.top, thiscpu->cpu_stack.eventnum[0]); + return true; + } + + // Calculate exit_latency = min(exit_latency, pending_span_latency) + if (pending_span_latency < exit_latency) { + // Actual remaining idle is shorter than supposed exit latency. + exit_latency = pending_span_latency; + } + // If too short, don't bother with the c-exit + if (exit_latency < kMIN_CEXIT_DURATION) {return true;} + + // Inserting the c-exit will shorten the pending idle + ////thiscpu->cur_span.duration -= exit_latency; +////fprintf(stdout, "~~duration[%d] -= %llu = %llu\n", event.cpu, exit_latency, thiscpu->cur_span.duration); + + // Insert c-exit call/ret + uint64 cexit_start_ts = new_start_ts - exit_latency; + OneSpan newevent = event; + newevent.start_ts = cexit_start_ts; + newevent.duration = exit_latency; + //newevent.cpu = xx; + //newevent.pid = xx; + //newevent.rpcid = xx; + newevent.eventnum = event_c_exit; // Treated as a call/ret + newevent.arg = 0; + newevent.retval = 0; + newevent.ipc = 0; + newevent.name = "-c-exit-"; + // Inserting the c-exit shortens the pending low-power idle + InsertEvent(newevent, cpustate, perpidstate); + + // After the c-exit, we are no longer low power + thiscpu->cur_span.arg = 0; // Mark continuing idle as normal power + thiscpu->cur_span.name = kIdleName; + + return true; +} + + +// Insert a make-runnable event at clone, fork, etc. +bool FixupRunnable(uint64 new_start_ts, + const OneSpan& event, + CPUState* cpustate, + PerPidState* perpidstate) { + // We can be called with standalone call event, optimized call/ret, or ret + // If standalone call, we don't know the end time yet, so just return. + if (IsACall(event) && !IsOptimizedCall(event)) {return true;} + + // Insert runnable + OneSpan newevent = event; + newevent.start_ts = new_start_ts; + newevent.duration = 1; + //newevent.cpu = xx; + //newevent.pid = xx; + //newevent.rpcid = xx; + newevent.eventnum = KUTRACE_RUNNABLE; + newevent.arg = event.retval; // The target of clone/fork/etc. + newevent.retval = 0; + newevent.ipc = 0; + newevent.name = "runnable"; + InsertEvent(newevent, cpustate, perpidstate); + return true; +} + +// Insert an RPC msg event, describing msg span of packets on the network + +bool EmitRxTxMsg(const PidCorr& corr, CPUState* cpustate, PerPidState* perpidstate) { +//fprintf(stderr, "EmitRxTxMsg ts/rpcid/lglen8/rx = %llu %u %u %u\n", corr.k_timestamp, corr.rpcid, corr.lglen8, corr.rx); + uint64 k_timestamp; // Time kernel code saw hash32. 0 means not known yet + uint32 rpcid; // 0 means not known yet + uint16 lglen8; // 0 means not known yet + bool rx; // true if rx + + if ((corr.k_timestamp == 0) || (corr.rpcid == 0) || (corr.lglen8 == 0)) { + return true; + } + char msg_name[64]; + sprintf(msg_name, "%s.%d", methodnames[corr.rpcid].c_str(), corr.rpcid); + uint64 msg_len = TenPow(corr.lglen8); + uint64 dur = msg_dur_10nsec(msg_len); // Increments of 10ns + uint64 msg_event = corr.rx ? KUTRACE_RPCIDRXMSG : KUTRACE_RPCIDTXMSG; + + // Insert RpcMsg + OneSpan newevent; + // Subtracting duration shows incoming packets ending at kernel timestamp + // Outgoing packets start at kernel timestamp + newevent.start_ts = corr.k_timestamp - (corr.rx ? dur : 0LLU); + newevent.duration = dur; + newevent.cpu = 0; + newevent.pid = 0; + newevent.rpcid = corr.rpcid; + newevent.eventnum = msg_event; + newevent.arg = msg_len; + newevent.retval = 0; + newevent.ipc = 0; + newevent.name = string(msg_name); +//DumpEvent(stderr, "EmitRxTxMsg:", newevent); + InsertEvent(newevent, cpustate, perpidstate); + return true; +} + +// If the length is nearly 0, this is from an old client4/server4 that did not include +// the signature/header size. +// Set it here to TenLg(16 + 72) = lg(88) * 10 = 6.46, so 64 +uint64 FixupLength(uint64 lglen8) { + return uint64max(64, lglen8); +} + + +//---------------------------------------------------------------------------// +// Preprocess cleans up the input events: +// - Insert any missing calls/ returns +// - Insert any missing make-runnable +// - Insert mwait sine waves +// - Insert any missing names +// - Insert wait_* events +// - Insert wakeup-to-exec arcs +// +void PreProcessEvent(const OneSpan& event, + CPUState* cpustate, + PerPidState* perpidstate) { + CPUState* thiscpu = &cpustate[event.cpu]; + PidState* thiscpu_stack = &thiscpu->cpu_stack; + + // Fixups may say to delete this event + bool keep = true; + + // The start_ts to insert fixups can be the current time, event.start_ts, + // or the start of the current span, thiscpu->cur_span.start_ts, but only + // if the current span is valid + uint64 span_start_time = thiscpu->cur_span.start_ts; + if (!thiscpu->valid_span) {span_start_time = event.start_ts;} + + //--------------------------------------------// + // Things to insert BEFORE the current event // + //--------------------------------------------// + + // - Insert any missing calls/ returns + // Fixup: return from X + // if TOS = call to X, all is good, else pop as needed + // If X is not on the stack, insert call to it at span_start_ts + if (IsAReturn(event)) { + keep &= FixupReturn(span_start_time, event, cpustate, perpidstate); + } + + // Fixup: a call to -sched- with reschedule_ipi on the stack silently pops it off + if (IsSchedCallEvent(event)) { + keep &= FixupResched(span_start_time, event, cpustate, perpidstate); + } + + // Fixup: a syscall/irq/fault INSIDE -sched-; we missed the return from sched + // insert a return from sched at context switch if any, + // putting us back at top-level user-mode + if ((thiscpu->ctx_switch_ts > 0) && IsACall(event) && (thiscpu_stack->top == 1) && + (IsSchedCallEventnum(thiscpu_stack->eventnum[thiscpu_stack->top]))) { + keep &= FixupSched(thiscpu->ctx_switch_ts, event, cpustate, perpidstate); + } + + // Fixup: call to X + // if TOS = call to something that must nest inside X, pop and repeat + if (IsACall(event)) { + keep &= FixupCall(event.start_ts, event, cpustate, perpidstate); + } + + // Insert mwait sine waves + // The earlier mwait already changed idle/mwait/X to idle/mwait/idle/X + // We are at X + // Fixup: Turn idle/mwait/X into idle'/mwait/c-exit/X + // events -- shorter idle followed by power C-state exit latency + if (cpustate[event.cpu].mwait_pending > 0) { + // We are at X + keep &= FixupCexit(event.start_ts, event, cpustate, perpidstate); + cpustate[event.cpu].mwait_pending = 0; +//fprintf(stdout, "~~mwait_pending[%d] = %d\n", event.cpu, 0); + } + + // - Insert any missing names + // - Insert wakeup-to-exec arc events + + // + // Remember bits of state + // + + // Remember last instance of each PID, for xxx + // We want to do this for the events that finish execution spans + if ((event.pid > 0) && (event.cpu >= 0)) { + priorPidEvent[event.pid] = event; +//fprintf(stdout, "~~priorPidEvent[%d] = %llu\n", event.pid, event.start_ts); + } + + // Remember that there is no pending context switch, for FixupSched + if (IsSchedCallEvent(event) || IsSchedReturnEvent(event)) { + thiscpu->ctx_switch_ts = 0; +//fprintf(stdout, "~~ctx_switch_ts[%d] = 0\n", event.cpu); + } + + if (IsAContextSwitch(event)) { // for FixupSched + // Remember this pending context switch time, in case /sched is missing + thiscpu->ctx_switch_ts = event.start_ts; +//fprintf(stdout, "~~ctx_switch_ts[%d] = %llu\n", event.cpu, event.start_ts); + } + + // Remember any mwait by cpu, for drawing c-state exit sine wave + if (IsAnMwait(event)) { // For FixupCexit + thiscpu->mwait_pending = event.arg; +//fprintf(stdout, "~~mwait_pending[%d] = %d\n", event.cpu, event.arg); + } + + // Remember any failed lock acquire event, for wait_lock + if (event.eventnum == KUTRACE_LOCKNOACQUIRE) { + pendingLock[event.arg] = event; + priorPidLock[event.pid] = event.arg; +//fprintf(stdout, "~~priorPidLock[%d] = %d\n", event.pid, event.arg); + } + + // Enqueue/dequeue processing: make a queue span per RPC + if (IsAnEnqueue(event)) { + // Remember which queue the RPC is put on but wait until the + // upcoming RPCIDREQ/RESP to use that timestamp + // queue_num is in event.arg + thiscpu->cpu_stack.enqueue_num_pending = event.arg; + } + + if (IsADequeue(event)) { + // Remember which queue the RPC is removed from but wait until the + // upcoming RPCIDREQ/RESP to use that timestamp + thiscpu->cpu_stack.dequeue_num_pending = event.arg; + } + + // Go ahead and timestamp enq/deq either at RPC change or context switch + if (IsAnRpc(event) || IsAContextSwitch(event)) { + if (0 <= thiscpu->cpu_stack.enqueue_num_pending) { + // Switching away from an RPC. Remember that queued span starts here + // Old rpcid is in event.rpcid + enqueuetime[event.rpcid] = event.start_ts + 1; // Start used below + thiscpu->cpu_stack.enqueue_num_pending = -1; + } + + if (0 <= thiscpu->cpu_stack.dequeue_num_pending) { + // Switching to new RPC. Emit a queued span ending here + // New rpcid is in event.arg + OneSpan temp_span; + MakeQueuedSpan(enqueuetime[event.arg], event.start_ts - 1, + thiscpu->cpu_stack.dequeue_num_pending, event.arg, &temp_span); + thiscpu->cpu_stack.dequeue_num_pending = -1; + // Don't clutter if the queued waiting is short (say < 10 usec) + if (temp_span.duration >= kMIN_WAIT_DURATION) { + WriteSpanJson2(stdout, &temp_span); // Standalone queued span + } + } + } + + +// +// Begin RPC packet correlation +// +// NOTE: Must do incoming test before outgoing work +// +// Incoming event order +// RX_PKT: remember kernal timestamp in rx_hashtocorr[hash32] +// RX_USER: find k_ts in rx_hashtocorr[hash32], remember k_ts in pidtocorr[pid], +// erase rx_hashtocorr[hash32] +// RPCIDRE*: have rpcid/length, find k_ts in pidtocorr[pid]; put out (rpcid/name/length/k_ts) +// pidtocorr[pid] + uint32 pkt_hash32 = (uint32)event.arg; + + if (IsRawRxPktInt(event.eventnum)) { +//DumpEvent(stderr, "IsRawRxPktInt:", event); + rx_hashtocorr[pkt_hash32] = inithashcorr; + rx_hashtocorr[pkt_hash32].k_timestamp = event.start_ts; + } + + if (IsUserRxPktInt(event.eventnum)) { +//DumpEvent(stderr, "IsUserRxPktInt:", event); + pidtocorr[event.pid] = initpidcorr; + if (rx_hashtocorr.find(pkt_hash32) != rx_hashtocorr.end()) { + pidtocorr[event.pid].k_timestamp = rx_hashtocorr[pkt_hash32].k_timestamp; + } + rx_hashtocorr.erase(pkt_hash32); + pidtocorr[event.pid].rx = true; + } + + if (IsIncomingRpcReqResp(event)) { +//DumpEvent(stderr, "IsIncomingRpcReqResp:", event); + uint32 msg_rpcid16 = event.arg & 0xffff; + uint16 msg_lglen8 = FixupLength((event.arg >> 16) & 0xff); + pidtocorr[event.pid].rpcid = msg_rpcid16; + pidtocorr[event.pid].lglen8 = msg_lglen8; + keep &= EmitRxTxMsg(pidtocorr[event.pid], cpustate, perpidstate); + pidtocorr.erase(event.pid); + } + +// Outgoing event order +// RPCIDRE*: remember rpcid/length in pidtocorr[pid] +// TX_USER: remember pid in tx_hashtocorr[hash32] +// TX_PKT: have kernel timestamp, have pid in tx_hashtocorr[hash32], rpcid/length in pidtocorr[pid]; +// erase rx_hashtocorr[hash32] +// put out (rpcid/name/length/k_ts) +// erase pidtocorr[pid] + if (IsOutgoingRpcReqResp(event)) { + // This creates a pidtocorr record. If the test for IsIncomingRpcReqResp + // follows this, it will erroneously return true. So we do the + // incoming correlation first, above. +//DumpEvent(stderr, "IsOutgoingRpcReqResp:", event); + uint32 msg_rpcid16 = event.arg & 0xffff; + uint16 msg_lglen8 = FixupLength((event.arg >> 16) & 0xff); + pidtocorr[event.pid] = initpidcorr; + pidtocorr[event.pid].rpcid = msg_rpcid16; + pidtocorr[event.pid].lglen8 = msg_lglen8; + pidtocorr[event.pid].rx = false; + } + + if (IsUserTxPktInt(event.eventnum)) { +//DumpEvent(stderr, "IsUserTxPktInt:", event); + tx_hashtocorr[pkt_hash32] = inithashcorr; + tx_hashtocorr[pkt_hash32].pid = event.pid; + } + + if (IsRawTxPktInt(event.eventnum)) { +//DumpEvent(stderr, "IsRawTxPktInt:", event); + uint32 pid = 0; + if (tx_hashtocorr.find(pkt_hash32) != tx_hashtocorr.end()) { + pid = tx_hashtocorr[pkt_hash32].pid; + } + tx_hashtocorr.erase(pkt_hash32); + if (pidtocorr.find(pid) != pidtocorr.end()) { + pidtocorr[pid].k_timestamp = event.start_ts; + keep &= EmitRxTxMsg(pidtocorr[pid], cpustate, perpidstate); + } + pidtocorr.erase(pid); + } +// End RPC packet correlation + + + if (event.eventnum == KUTRACE_MBIT_SEC) { + mbit_sec = event.arg; + keep = false; // Not a JSON event -- moved to JSON metadata + } + + //--------------------------------------------// + // The current event // + //--------------------------------------------// + + if (keep) { + ProcessEvent(event, cpustate, perpidstate); + } + + //--------------------------------------------// + // Things to insert AFTER the current event // + //--------------------------------------------// + + // - Insert any missing make-runnable + // Do this AFTER the syscall/ret has been processed (keep is ignored) + if (IsNewRunnablePidSyscall(event) && (event.retval != 0)) { + keep &= FixupRunnable(event.start_ts + event.duration, event, cpustate, perpidstate); + } + +} // End PreProcessEvent + +// +//---------------------------------------------------------------------------// + + + +// Fix PID names +// We do four things here: +// (1) For each PID number, keep its current name, in time order +// (2) For each PID number, keep a string of all the names assigned to it, in time order +// first+second+third... for naming a row of the timeline display +// (3) Force idle PID name to be consistent +// (4) Update the name in each user-mode span + +// We will see multiple names for a process ID after execve +// If we see a, then b, then c for the same PID, form a+b+c + +// Record the current name for each process ID, PID +// If not the dummy names at time -1, also record any accumulated varying names for the same PID +void RecordPidName(int64 temp_ts, int temp_arg, char* temp_name, CPUState* cpustatep) { + // temp_arg might be an event number, pid + 0x10000. If so, covert it to a PID number + temp_arg = EventnumToPid(temp_arg); + if (temp_arg == pid_idle) {return;} // Never update idle name + + string temp_name_str = string(temp_name); + // Turn "./kutrace_contro" into "kutrace_control" + if (strcmp(temp_name, "./kutrace_contro") == 0) { + temp_name_str = string("kutrace_control"); + } + + // Remove any leading ./ pathname + if (memcmp(temp_name, "./", 2) == 0) { + temp_name_str = string(temp_name + 2); + } + + // Record current name for this pid + pidnames[temp_arg] = temp_name_str; // Current name for this PID +////fprintf(stderr, "pidname[%5d] = %s %lld\n", temp_arg, pidnames[temp_arg].c_str(), temp_ts); + + if (temp_ts == -1) {return;} + + // Record accumulated row name(s) for a PID + if (pidrownames[temp_arg].find(temp_name_str) == string::npos) { + // Add this pid to row name + if (pidrownames[temp_arg].empty()) { + pidrownames[temp_arg] = temp_name_str; + } else { + pidrownames[temp_arg] = pidrownames[temp_arg] + "+" + temp_name_str; +////fprintf(stderr, "rowname[%5d] = %s\n", temp_arg, pidrownames[temp_arg].c_str()); + } + } + + // Update this name on any pending CPU stack + for (int cpu = 0; cpu <= max_cpu_seen; ++cpu) { + if(cpustatep[cpu].cpu_stack.eventnum[0] == PidToEventnum(temp_arg)) { + cpustatep[cpu].cpu_stack.name[0] = NameAppendPid(temp_name_str, temp_arg); + } + } +} + +void FixPidName(OneSpan* eventp) { + if (!IsUserExec(*eventp) && !(IsAContextSwitch(*eventp))) {return;} + + // Force in the current name from pidnames[pid] + int pid = EventnumToPid(eventp->eventnum); + if (pidnames.find(pid) != pidnames.end()) { + eventp->name = NameAppendPid(pidnames[pid], pid); + // Also update the stacked name for this pid + // also update the span name for this pid + //stack->name[0] + } +} + +// For Raspberry PI, change mwait to wfi +void FixMwaitName(OneSpan* eventp) { + if (is_rpi && IsAnMwait(*eventp)) { + eventp->name = string("wfi"); + } + +} + +// This covers many naming sins +void FixNames(OneSpan* eventp) { + FixPidName(eventp); + // Fix lock name + // Fix queue name + // Fix RPC name + // Fix misc. other names + FixMwaitName(eventp); +} + + +static const int kMaxBufferSize = 256; + +// Read next line, stripping any crlf. Return false if no more. +bool ReadLine(FILE* f, char* buffer, int maxsize) { + char* s = fgets(buffer, maxsize, f); + if (s == NULL) {return false;} + int len = strlen(s); + // Strip any crlf or cr or lf + if (s[len - 1] == '\n') {s[--len] = '\0';} + if (s[len - 1] == '\r') {s[--len] = '\0';} + return true; +} + +// We assign every nanosecond of each CPUs time to some time span. +// Initially, all CPUs are assumed to be executing the idle job, pid=0 +// Any syscall/irq/trap pushes into that kernel code +// Any matching return pops back to the current user code +// Items can nest only in this order: +// user ==> syscall ==> irq ==> trap +// In general, there will be missing and sometimes wrong information, so +// this program needs to be robust in assigning time in meaningful ways. +// +// We run a small stack for each CPU, and swap it away when there is a +// context switch, bringing it back when there is a context switch back. +// +// If we encounter a not-allowed transition, we insert pops and pushes as needed +// to make a correctly-nested set of time spans. + +// +// Usage: eventtospan3 [-v] [-t] +// +int main (int argc, const char** argv) { + CPUState cpustate[kMAX_CPUS]; // Running state for each CPU + PerPidState perpidstate; // Saved PID call stacks, for context switching + + OneSpan event; + string trace_label; + string trace_timeofday; + kernel_version.clear(); + cpu_model_name.clear(); + host_name.clear(); + methodnames.clear(); + pidtocorr.clear(); + rx_hashtocorr.clear(); + tx_hashtocorr.clear(); + + + if (argc >= 2) { + // Pick off trace label from first argument, if any + trace_label = string(argv[1]); + } + + // Pick off other flags + for (int i = 1; i < argc; ++i) { + if (strcmp(argv[i], "-v") == 0) {verbose = true;} + if (strcmp(argv[i], "-t") == 0) {trace = true;} + if (strcmp(argv[i], "-rel0") == 0) {rel0 = true;} + } + + // Initialize CPU state + for (int i = 0; i < kMAX_CPUS; ++i) { + InitPidState(&cpustate[i].cpu_stack); + InitSpan(&cpustate[i].cur_span, i); + //InitSpan(&cpustate[i].prior_pc_sample, i); + cpustate[i].prior_pstate_ts = 0; + cpustate[i].prior_pstate_freq = 0; + cpustate[i].prior_pc_samp_ts = 0; + cpustate[i].ctx_switch_ts = 0; + cpustate[i].mwait_pending = 0; + cpustate[i].oldpid = 0; + cpustate[i].newpid = 0; + cpustate[i].valid_span = false; // Ignore initial span + } + + // Set idle name + pidnames[pid_idle] = string(kIdleName); + pidrownames[pid_idle] = string(kIdleName); + + // PID 0, the idle task, is special. Multiple copies can be running on different CPUs, and + // It can be in the midst of an interrupt when a context switch goes to another thread, + // but the interrupt code is silently done. + // Here we set the stacked idle task as inside sched, and we never change that elsewhere. + BrandNewPid(pid_idle, string(kIdleName), &perpidstate); + + + // + // Main loop + // + uint64 lowest_ts = 0; + uint64 prior_ts = 0; + int linenum = 0; + char buffer[kMaxBufferSize]; + while (ReadLine(stdin, buffer, kMaxBufferSize)) { + ++linenum; + int len = strlen(buffer); + if (buffer[0] == '\0') {continue;} + + // Comments start with #, some are stylized and contain data + if (buffer[0] == '#') { + // Pull timestamp out of early comments + // Look for first + // # [1] 2017-08-21_09:51:48.620665 + // Must be there. This triggers initial json output + if ((len >= 32) && + trace_timeofday.empty() && + (memcmp(buffer, "# [1] 20", 8) == 0)) { + // From # [1] 2019-03-16_16:43:42.571604 + // extract 2019-03-16_16:43:00 + // since the timestamps are all relative to a minute boundary + trace_timeofday = string(buffer, 6, 17) + "00"; + //fprintf(stderr, "eventtospan3: trace_timeofday '%s'\n", trace_timeofday.c_str()); + InitialJson(stdout, trace_label.c_str(), trace_timeofday.c_str()); + } + // Pull version and flags out if present + if (memcmp(buffer, "# ## VERSION: ", 14) == 0) { + incoming_version = atoi(buffer + 14); + //fprintf(stderr, "VERSION %d\n", incoming_version); + } + if (memcmp(buffer, "# ## FLAGS: ", 12) == 0) { + incoming_flags = atoi(buffer + 12); + //fprintf(stderr, "FLAGS %d\n", incoming_flags); + } + continue; + } + + // Input created by: + // fprintf(stdout, "%lld %lld %lld %lld %lld %lld %lld %lld %d %s (%llx)\n", + // mhz, duration, event, current_cpu, current_pid[current_cpu], current_rpc[current_cpu], + // arg, retval, ipc, name.c_str(), event); + // or if a name by + // fprintf(stdout, "%lld %lld %lld %lld %s\n", + // mhz, duration, event, nameinsert, tempstring); + // + + // Trace flag prints each incoming line and the resulting stack and span, + // all on one line + if (trace) {fprintf(stderr, "\n%s", buffer);} + + char name_buffer[256]; + // Pick off the event to see if it is a name definition line + // (This could be done with less repeated effort) + int64 temp_ts; + uint64 temp_dur; + int temp_eventnum = 0; + int temp_arg = 0; + char temp_name[64]; + sscanf(buffer, "%lld %llu %d %d %[ -~]", &temp_ts, &temp_dur, &temp_eventnum, &temp_arg, temp_name); + if (IsNamedef(temp_eventnum)) { +//fprintf(stdout, "====%%%s\n", buffer); + if (IsLockNameInt(temp_eventnum)) { // Lock names + locknames[temp_arg] = string(temp_name); + } else if (IsKernelVerInt(temp_eventnum)) { + kernel_version = string(temp_name); + if (temp_ts == -1) {fprintf(stderr, "kernel_version = %s\n", temp_name);} + } else if (IsModelNameInt(temp_eventnum)) { + // If the model is Raspberry, set pstate_is_all_cpus + if (strstr(temp_name, "Raspberry") != NULL) { + is_rpi = true; + } + cpu_model_name = string(temp_name); + if (temp_ts == -1) {fprintf(stderr, "cpu_model_name = %s\n", temp_name);} + } else if (IsHostNameInt(temp_eventnum)) { + host_name = string(temp_name); + if (temp_ts == -1) {fprintf(stderr, "host_name = %s\n", temp_name);} + ////} else if (IsUserExecNonidlenum(temp_arg)) { // Just pick off PID names, accumulating if multiple ones + } else if (IsPidNameInt(temp_eventnum)) { // Just pick off PID names, accumulating if multiple ones + RecordPidName(temp_ts, temp_arg, temp_name, cpustate); + // Update any active stack if name just changed + // Update any current span if name just changed + } else if (IsMethodNameInt(temp_eventnum)) { + // Step (0) of RPC-to-packet correlation + int rpcid = temp_arg & 0xffff; + methodnames[rpcid] = string(temp_name); + } else if (IsQueueNameInt(temp_eventnum)) { + queuenames[temp_arg] = string(temp_name); // Queue number is a small integer + } + // Ignore the rest of the names -- already handled by rawtoevent and sort + continue; + } + + // Read the full non-name event + if (incoming_version < 2) { + int n = sscanf(buffer, "%llu %llu %d %d %d %d %d %d %s", + &event.start_ts, &event.duration, &event.eventnum, &event.cpu, + &event.pid, &event.rpcid, &event.arg, &event.retval, name_buffer); + event.ipc = 0; + if (n != 9) {continue;} + } else { + int n = sscanf(buffer, "%llu %llu %d %d %d %d %d %d %d %s", + &event.start_ts, &event.duration, &event.eventnum, &event.cpu, + &event.pid, &event.rpcid, &event.arg, &event.retval, + &event.ipc, name_buffer); + if (n != 10) {continue;} + } + event.name = string(name_buffer); + + // Fix event.rpcid. rawtoevent does not carry them across context switches + event.rpcid = cpustate[event.cpu].cpu_stack.rpcid; // 2021.02.05 + + // Fixup name of idle thread once and for all + if (IsAnIdle(event)) {event.name = string(kIdleName);} + + // Input must be sorted by timestamp + if (event.start_ts < prior_ts) { + fprintf(stderr, "rawtoevent: Timestamp out of order at line[%d] %s\n", linenum, buffer); + exit(0); + } + +if (verbose) { +fprintf(stdout, "\n%% [%d] %llu %llu %03x(%d)=%d %s ", + event.cpu, event.start_ts, event.duration, + event.eventnum, event.arg, event.retval, event.name.c_str()); +DumpShort(stdout, &cpustate[event.cpu]); +} + + if ((lowest_ts == 0) && (0 < event.start_ts)) { + lowest_ts = event.start_ts; + } + + if (kMAX_CPUS <= event.cpu){ + fprintf(stderr, "FATAL: Too-big CPU number at line[%d] '%s'\n", linenum, buffer); + exit(0); + } + + // Keep track of largest CPU number seen + if (max_cpu_seen < event.cpu) { + max_cpu_seen = event.cpu; + } + + // Fixup names + FixNames(&event); + + // Fixup lock names + if (IsALockOneSpan(event)) { + char maybe_better_name[64]; + sprintf(maybe_better_name, "%s%s", + kSpecialName[event.eventnum & 0x001f], + locknames[event.arg].c_str()); + if (true || strlen(maybe_better_name) > strlen(name_buffer)) { + // Do the replacement +//fprintf(stderr, "LOCK %d %s => %s\n", event.arg, name_buffer, maybe_better_name); + event.name = string(maybe_better_name); + } + } + + // Fixup queue names, adding queue number if missing + if (IsAnEnqueue(event) || IsADequeue(event)) { + if (strchr(name_buffer, '(') == NULL) { + char temp[64]; + sprintf(temp, "%s(%d)", name_buffer, event.arg); + event.name = string(temp); + } + } + + // Collect RPC method names from RPC point events. TEMPORARY + // DONE: capture proper RPC *method* names in the RPC library and pass them through + // Just grab the first name, which should be on an rpc request event +/*** + if (IsAnRpc(event)) { + // event.rpcid is always valid for RPC events + if (rpcnames.find(event.rpcid) == rpcnames.end()) { + if (false && event.name.find("rpc") == string::npos) { + fprintf(stderr, "BAD rpcname %s\n", event.name.c_str()); + DumpEvent(stderr, "RPC", event); + } else { + rpcnames[event.rpcid] = event.name; + } + } + } +***/ + + prior_ts = event.start_ts; + + // Now do the real work + PreProcessEvent(event, &cpustate[0], &perpidstate); + + if (trace) { + fprintf(stderr, "\t"); + CPUState* thiscpu = &cpustate[event.cpu]; + DumpStackShort(stderr, &thiscpu->cpu_stack); + } + } + // + // End main loop + // + + // Flush the last frequency spans here + for (int i = 0; i <= max_cpu_seen; ++i) { + if (cpustate[i].prior_pstate_ts != 0) { + uint64 prior_ts = cpustate[i].prior_pstate_ts; + uint64 prior_freq = cpustate[i].prior_pstate_freq; + WriteFreqSpan(prior_ts, event.start_ts, i, prior_freq); + } + } + + // Keep any hardware description. Leading space is required. + fprintf(stdout, " \"mbit_sec\" : %d,\n", mbit_sec); + + // Put out any multi-named PID row names + for (IntName::const_iterator it = pidrownames.begin(); it != pidrownames.end(); ++it) { + int pid = it->first; + string rowname = it->second; + double lowest_sec = lowest_ts / 100000000.0; + if (rowname.find("+") != string::npos) { + fprintf(stdout, "[%12.8f, %10.8f, %d, %d, %d, %d, %d, %d, %d, \"%s.%d\"],\n", + lowest_sec, 0.00000001, 0, pid, 0, KUTRACE_LEFTMARK, 0, 0, 0, rowname.c_str(), pid); + } + } + + FinalJson(stdout); + + // Statistics for main timeline; no decorations, PCsamp, etc. + double total_dur = total_usermode + total_idle + total_kernelmode; + total_dur *= 0.01; // To give percents + fprintf(stderr, + "eventtospan3: %lld spans, %2.0f%% usr, %2.0f%% sys, %2.0f%% idle\n", + span_count, + total_usermode / total_dur, total_kernelmode / total_dur, total_idle / total_dur); + + return 0; +} diff --git a/book-user-code/fancylock2.cc b/book-user-code/fancylock2.cc new file mode 100644 index 000000000000..7aa398a8707f --- /dev/null +++ b/book-user-code/fancylock2.cc @@ -0,0 +1,391 @@ +// fancylock2.cc +// +// This defines a software lock that includes some statistics and some identification +// +// Copyright 2021 Richard L. Sites + +#include +#include +#include + +#include "basetypes.h" +#include "fancylock2.h" + +// +// Fancylock2 (64 bytes, cacheline aligned) +// +// +-------+-------+-------+-------+-------+-------+-------+-------+ +// 0 | lock | waiters | +// +-------+-------+-------+-------+-------+-------+-------+-------+ +// 8 | wait.counts | +// +-------+-------+-------+-------+-------+-------+-------+-------+ +// 16 | wait.counts_hi | +// +-------+-------+-------+-------+-------+-------+-------+-------+ +// 24 | hmin | hmax |expec'd| ///// | holder | +// +-------+-------+-------+-------+-------+-------+-------+-------+ +// 32 | ///////////////////////////// | ///////////////////////////// | | +// +-------+-------+-------+-------+-------+-------+-------+-------+ +// 40 | lnamehash | filename +// +-------+-------+-------+-------+-------+-------+-------+-------+ +// 48 | filename | +// +-------+-------+-------+-------+-------+-------+-------+-------+ +// 56 | filename:line | +// +-------+-------+-------+-------+-------+-------+-------+-------+ +// + +static const int32 kBucketWidthBits[8] = {13, 11, 10, 8, 7, 6, 5, 4}; +static const int32 kBucketStartBit[8] = {0, 13, 24, 34, 42, 49, 55, 60}; +static const uint64 kBucketIncr[8] = + {0x0000000000000001LLU, 0x0000000000002000LLU, + 0x0000000001000000LLU, 0x0000000400000000LLU, + 0x0000040000000000LLU, 0x0002000000000000LLU, + 0x0080000000000000LLU, 0x1000000000000000LLU}; +static const uint64 kBucketField[8] = + {0x0000000000001FFFLLU, 0x0000000000FFE000LLU, + 0x00000003FF000000LLU, 0x000003FC00000000LLU, + 0x0001FC0000000000LLU, 0x007E000000000000LLU, + 0x0F80000000000000LLU, 0xF000000000000000LLU}; +static const uint64 kBucketHigh[8] = + {0x0000000000001000LLU, 0x0000000000800000LLU, + 0x0000000200000000LLU, 0x0000020000000000LLU, + 0x0001000000000000LLU, 0x0040000000000000LLU, + 0x0800000000000000LLU, 0x8000000000000000LLU}; + +static const uint64 kBucketAllLow = + 0x0000000000000001LLU | 0x0000000000002000LLU | + 0x0000000001000000LLU | 0x0000000400000000LLU | + 0x0000040000000000LLU | 0x0002000000000000LLU | + 0x0080000000000000LLU | 0x1000000000000000LLU; + +// Upper value of each histogram bucket for power-of-ten buckets +static const uint32 kWaitMaxes[8] = {9, 99, 999, 9999, 99999, 999999, 9999999, 0x7FFFFFFF}; + + +// These tables let us map integer values up to about 100M into single bytes and back out +// with better than 10% accuracy. This is good enough for 1-2 digits precision +// of 90th percentile microsecond delay. + +// log10(n) as 3.5 bits rounded, [0..255], e.g. log10(255)=2.4065 x32 = 77.008 +static const uint8 CACHEALIGNED kLog10As3dot5[256] = { + 0, 1, 10, 15, 19, 22, 25, 27, 29, 31, 32, 33, 35, 36, 37, 38, + 39, 39, 40, 41, 42, 42, 43, 44, 44, 45, 45, 46, 46, 47, 47, 48, + 48, 49, 49, 49, 50, 50, 51, 51, 51, 52, 52, 52, 53, 53, 53, 54, + 54, 54, 54, 55, 55, 55, 55, 56, 56, 56, 56, 57, 57, 57, 57, 58, + 58, 58, 58, 58, 59, 59, 59, 59, 59, 60, 60, 60, 60, 60, 61, 61, + 61, 61, 61, 61, 62, 62, 62, 62, 62, 62, 63, 63, 63, 63, 63, 63, + 63, 64, 64, 64, 64, 64, 64, 64, 65, 65, 65, 65, 65, 65, 65, 65, + 66, 66, 66, 66, 66, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, + + 67, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 69, 69, 69, 69, 69, + 69, 69, 69, 69, 69, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, + 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 71, 72, 72, 72, 72, + 72, 72, 72, 72, 72, 72, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73, + 73, 73, 73, 73, 73, 73, 73, 74, 74, 74, 74, 74, 74, 74, 74, 74, + 74, 74, 74, 74, 74, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, 75, + 75, 75, 75, 75, 75, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, 76, + 76, 76, 76, 76, 76, 76, 77, 77, 77, 77, 77, 77, 77, 77, 77, 77, +}; + +// pow10(n/32), n in [0..31] +// Table values are 4.4 bits, 0.0 .. 9.3057 +// e.g. pow10(31/32) = 9.3057 x16 = 149 +static const uint8 kPow10As4dot4[32] = { + 16, 17, 18, 20, 21, 23, 25, 26, + 28, 31, 33, 35, 38, 41, 44, 47, + 51, 54, 58, 63, 67, 73, 78, 84, + 90, 97, 104, 112, 120, 129, 139, 149 +}; + +inline uint8 min_uint8(uint8 a, uint8 b) {return (a < b) ? a : b;} +inline uint8 max_uint8(uint8 a, uint8 b) {return (a > b) ? a : b;} + +// Quick hash of 24 byte string into low 16 bits of uint64 +// Constants from murmur3 +inline uint16 Hash16(const char* str) { + const uint64* str64 = reinterpret_cast(str); + uint64 hash = (str64[0] * 0xff51afd7ed558ccdLLU) + + (str64[1] * 0xc4ceb9fe1a85ec53LLU) + + (str64[2] * 0xff51afd7ed558ccdLLU); + hash ^= (hash >> 32); + hash ^= (hash >> 16); + uint16 hash16 = hash; // Truncates to 16 bits + return hash16; +} + +// Return log base 10 of val as a 3.5 fixed-point byte. +// log10 as 3.5 bits 0.0 .. 7.31 (7.96875) = 1.0 .. 93,057,204 in steps of 1.0746x +uint8 Log10As3dot5(uint32 val) { + if (val > 93057204) {return 255;} + // Table lookup of divisor could be faster, but multiple divides cannot happen often + // (<400 per second when over 2550 usec each) + uint8 n = 0; + while (val > 2550) {val /= 100; n += 2*32;} + if (val > 255) {val /= 10; n += 1*32;} + return n + kLog10As3dot5[val]; // We trade up to 4 cache lines for no conditional branches here. +} + +// Input is xxx.yyyyy as eight bits in a single byte +// Return (10 ** xxx) * (10 ** 0.yyyyy) +// Smallest possible non-zero value is Log10byteToFloat(1) ==> 1.06250 +// Largest possible value is Log10byteToFloat(255) ==> 93125000.0 +float Log10byteToFloat(uint8 xxxyyyyy) { + if (xxxyyyyy == 0) {return 0.0;} + float retval = 1.0; + int xxx = xxxyyyyy >> 5; + int yyyyy = xxxyyyyy & 0x1F; + while (xxx > 0) {retval *= 10.0; --xxx;} + return retval * (kPow10As4dot4[yyyyy] / 16.0); +} + +int Log10byteToInt(uint8 xxxyyyyy) { + int retval = roundf(Log10byteToFloat(xxxyyyyy)); + return retval; +} + + + +// Called infrequently, so not performance critical +inline uint64 GetField(uint64 counts, int i) { + return (counts & kBucketField[i]) >> kBucketStartBit[i]; +} + +// Called infrequently, so not performance critical +void UnpackCounts(const FancyLock2::CheapHist2* ch, uint32* bucketcounts) { + for (int i = 0; i < 8; ++i) { + bucketcounts[i] = GetField(ch->counts, i); + bucketcounts[i] += GetField(ch->counts_hi, i) << kBucketWidthBits[i]; + } +} + +// Return percentile fractional location in 8 buckets as a 3.5 fixed-point byte. +// E.g. for bucketcounts[8] = {10, 20, 10, 8, 8, 0, 0, 0} 90th percentile 50.4 of 56 +// is bucket[4.25] x32 = 136 +// +// We map into a linear fraction and do the log mapping on the way out +// If we use half the counts in a 10x bucket, assume it lands at 10 ** 0.5 = 3.16, +// not at 10/2 = 5. Thisis a better match to likely declining tail above 80th %ile +// 0.1 ==> 1.26, 0.5 ==> 3.16, 0.9 ==> 7.94 in 1..10 bucket +// Averaging bucket fractions then 10**x will produce geometric mean, not arithmetic +// +// Called infrequently, so not performance critical +// +// Percentile ranges from 0.0 to 1.0, not 0-100 +uint8 FindPercentileBucket(float percentile, const uint32* bucketcounts, + const FancyLock2::CheapHist2* ch) { + uint32 totalcount = 0; + for (int i = 0; i < 8; ++i) {totalcount += bucketcounts[i];} + if (totalcount == 0) {return 0;} + if (percentile <= 0.0001) {return ch->hmin;} + if (percentile > 0.9999) {return ch->hmax;} + + float goalcount = totalcount * percentile; + + // We want bucket.fraction that gives + // goalcount <= running counts up to and including that bucket + float runningcount = 0; + int k = 0; + while ((runningcount + bucketcounts[k]) < goalcount) { + runningcount += bucketcounts[k++]; // Completely include bucket k + } + // At this point, + // runningcount thru bucket k-1 is < goalcount and + // runningcount + bucketcounts[k] >= goalcount and + // k is in 0..7 + // We want to find the closest fraction of bucket k that approximates + // the percentile value. + // Most buckets have 32 choices of fraction, 0..31, but the first and last + // buckets are bounded by the hmin and hmax log values seen + float remainder = goalcount - runningcount; + uint8 lo = max_uint8(k * 32, ch->hmin) & 31; + uint8 hi = min_uint8(k * 32 + 31, ch->hmax) & 31; + // Interpolate, assuming total items in bucket are uniformly distributed in fractions [lo..hi) + float fraction = (hi + 1 - lo) * (remainder / bucketcounts[k]); + int ifraction = fraction; // truncates + // Note: if we need all of topmost bucket, sum can be 256. Return 255 in that case. + int retval = k * 32 + (lo + ifraction); + if (retval > 255) {retval = 255;} + return retval; // Truncates to 8 bits +} + +// Calc 90th percentile from histogram counts and then zero histogram +// Called infrequently, so not performance critical +int Calc90ile(const FancyLock2::CheapHist2* ch) { + if ((ch->counts == 0) && (ch->counts_hi == 0)) {return 0;} // We never started + uint32 bucketcounts[8]; + UnpackCounts(ch, bucketcounts); + uint8 percentile90 = FindPercentileBucket(0.90, bucketcounts, ch); + return Log10byteToInt(percentile90); +} + +void DumpCheapHist2(FILE* f, FancyLock2::CheapHist2* ch) { + uint32 bucketcounts[8]; + UnpackCounts(ch, bucketcounts); + uint32 sum = 0; + + fprintf(f, " 1us 10 100 1ms 10 100 1s 10\n"); + fprintf(f, " ["); + for (int i = 0; i < 8; ++i) { + fprintf(f, "%u ", bucketcounts[i]); + if ((i % 3) == 2) {fprintf(f, " ");} + sum += bucketcounts[i]; + } + fprintf(f, "] sum = %u\n", sum); + + fprintf(f, " Minimum %5d us\n", Log10byteToInt(ch->hmin)); + fprintf(f, " Maximum %5d us\n", Log10byteToInt(ch->hmax)); + fprintf(f, " 90th %%ile %5d us\n", Calc90ile(ch)); + fprintf(f, " Expected %5d us\n", Log10byteToInt(ch->expected)); +} + +void DumpFancyLock2Struct(FILE* f, FancyLock2::FancyLock2Struct* fl) { + fprintf(f, " Struct %s [%04x] %08x %08x\n", + fl->filename, fl->lnamehash, fl->lock, fl->waiters); + DumpCheapHist2(f, &fl->wait); + fprintf(f, "\n"); +} + +void DumpBuckets(FILE* f, FancyLock2::CheapHist2* ch) { + fprintf(f, "lo: "); + for (int i = 0; i < 8; ++i) { + int temp = GetField(ch->counts, i); + fprintf(f, "[%d]%d ", i, temp); + } + fprintf(f, " "); + fprintf(f, "hi: "); + for (int i = 0; i < 8; ++i) { + int temp = GetField(ch->counts_hi, i); + fprintf(f, "[%d]%d ", i, temp); + } + fprintf(f, "\n"); +} + +// We just incremented bucket bkt and it overflowed. +// First subtract back tokill carry into next bucket, then +// zero this bucket and increment count_hi bucket. +// If that overflows, halve all the counts (exponential decay over minutes to hours) +// Called infrequently; not performance critical +void Overflow(FancyLock2::CheapHist2* ch, int bkt) { +//fprintf(stderr, "Overflow carry[%d]\n", bkt); + // Correct the overflow increment + ch->counts -= kBucketIncr[bkt]; // Take out the increment + ch->counts &= ~kBucketField[bkt]; // Zero the field + ch->counts_hi += kBucketIncr[bkt]; // Carry into high bits + if ((ch->counts_hi & kBucketField[bkt]) == 0) { +fprintf(stderr, "\nOverflow[%d] halving the counts\n", bkt); + // Correct the hi overflow increment + ch->counts_hi -= kBucketIncr[bkt]; // Take out the increment + ch->counts_hi &= ~kBucketField[bkt]; // Zero the field + // Halve all the low counts + ch->counts &= ~kBucketAllLow; // Zero low bit of each bucket + ch->counts >>= 1; // Halve low part + // Move low bits of high half to high bits of low half + for (int i = 0; i < 8; ++i) { + if ((ch->counts_hi & kBucketIncr[i]) != 0) { + ch->counts |= kBucketHigh[i]; + } + } + ch->counts_hi &= ~kBucketAllLow; // Zero low bit of each bucket + ch->counts_hi >>= 1; // Halve high part + ch->counts_hi |= kBucketHigh[bkt]; // After shift, overflowed bucket = 1000... +//DumpBuckets(stderr, ch); +fprintf(stderr, "after "); DumpCheapHist2(stderr, ch); + } +} + +// Binary search of 8 bucket maximums +int FindSubscr(uint32 val, const uint32* maxes) { + if (val <= maxes[3]) { + if (val <= maxes[1]) { + return (val <= maxes[0]) ? 0 : 1; + } else { + return (val <= maxes[2]) ? 2 : 3; + } + } else { + if (val <= maxes[5]) { + return (val <= maxes[4]) ? 4 : 5; + } else { + return (val <= maxes[6]) ? 6 : 7; + } + } +} + + +//---------------------------------------------------------------------------// +// Exported routines // +//---------------------------------------------------------------------------// + +// Constructor +// Last parameter allows distinctive name init in array of locks for 0 fancy2struct_.wait.hmax) { + fprintf(stderr, "[%s] zero entries\n", fancy2struct_.filename); + return; + } + + int i90ile = Calc90ile(&fancy2struct_.wait); + int expected = Log10byteToInt(fancy2struct_.wait.expected); + fprintf(stderr, "[%s]%s\n", fancy2struct_.filename, + (i90ile > expected) ? " ERROR: 90%ile > EXPECTED" : ""); + + DumpCheapHist2(stderr, &fancy2struct_.wait); +} + +// Export current 90th percentile acquire time (usec) +int FancyLock2::Get90ile() { + return Calc90ile(&fancy2struct_.wait); +} + +// Record waiting time and queue depth. Takes about 10-15 nsec on Intel i3 7100. +// Called fairly frequently +void FancyLock2::IncrCounts(uint32 wait_us) { + //VERYTEMP + //fprintf(stdout, "[%s] IncrCounts(%dus, %dq)\n", + // fancy2struct_.filename, wait_us, queue_depth); + + // Remember min and max values + uint8 waitbyte = Log10As3dot5(wait_us); + fancy2struct_.wait.hmin = min_uint8(fancy2struct_.wait.hmin, waitbyte); + fancy2struct_.wait.hmax = max_uint8(fancy2struct_.wait.hmax, waitbyte); + // Increment wait histogram, bucket number bkt + int bkt = FindSubscr(wait_us , kWaitMaxes); + fancy2struct_.wait.counts += kBucketIncr[bkt]; + // If field is full, overflow into count_hi + if ((fancy2struct_.wait.counts & kBucketField[bkt]) == 0) { + Overflow(&fancy2struct_.wait, bkt); + } + +} + diff --git a/book-user-code/fancylock2.h b/book-user-code/fancylock2.h new file mode 100644 index 000000000000..77f85a3c2ae5 --- /dev/null +++ b/book-user-code/fancylock2.h @@ -0,0 +1,84 @@ +// fancylock2.h +// +// This defines a software lock that includes some statistics and some identification +// +// Copyright 2021 Richard L. Sites + +#ifndef __FANCYLOCK2_H__ +#define __FANCYLOCK2_H__ + +#include "basetypes.h" + +#define CACHEALIGNED __attribute__((aligned(64))) + +#define DEFINE_FANCYLOCK2(name, expected_wait_usec) \ + FancyLock2 name(__FILE__, __LINE__, expected_wait_usec) + +// +// Fancylock2 (64 bytes, cacheline aligned) +// +// +-------+-------+-------+-------+-------+-------+-------+-------+ +// 0 | lock | waiters | +// +-------+-------+-------+-------+-------+-------+-------+-------+ +// 8 | wait.counts | +// +-------+-------+-------+-------+-------+-------+-------+-------+ +// 16 | wait.counts_hi | +// +-------+-------+-------+-------+-------+-------+-------+-------+ +// 24 | hmin | hmax |expec'd| ///// | holder | +// +-------+-------+-------+-------+-------+-------+-------+-------+ +// 32 | ///////////////////////////// | ///////////////////////////// | | +// +-------+-------+-------+-------+-------+-------+-------+-------+ +// 40 | lnamehash | filename +// +-------+-------+-------+-------+-------+-------+-------+-------+ +// 48 | filename | +// +-------+-------+-------+-------+-------+-------+-------+-------+ +// 56 | filename:line | +// +-------+-------+-------+-------+-------+-------+-------+-------+ +// + +// The constructor initializes a lock variable with declared filename and line# +// The destructor prints contended acquisition time stats +class FancyLock2 { + public: + typedef struct { // Exactly 20 bytes + uint64 counts; // This has 8 different power-of-N count buckets bitpacked + uint64 counts_hi; // High-order bits of counts, 8 buckets bitpacked + uint8 hmin; // minimum log10 value seen, as 3.5 bits + uint8 hmax; // maximum log10 value seen, as 3.5 bits + uint8 expected; // Expected log10 value, as 3.5 bits + uint8 pad; // + } CheapHist2; + + // We want this to exactly fill one 64-byte cache line and not be split across two. + // Filename:linenum is the source file/line where this lock is declared + typedef struct { + volatile uint32 lock; // [0] 0 = unlocked, 1 = locked + uint32 waiters; // [4] 0 = no waiters, >0 = N waiters + CheapHist2 wait; // [8] + int32 holder; // [28] +ID of lock holder if uncontended acquire + // -ID of lockholder if contended acquire + // 0x80000000 if no holder + uint32 padding[2]; // [32] + uint16 lnamehash; // [40] Hash(filename) + char filename[22]; // [42] file suffix:linenum plus NUL + } FancyLock2Struct; + + FancyLock2(const char* filename, const int linenum, + const int expected_wait_usec, const int subline = 0); + ~FancyLock2(); + + // Export current 90th percentile acquire time (usec) + int Get90ile(); + + // Record waiting time and queue depth + void IncrCounts(uint32 wait_us); + + // The only data + FancyLock2Struct CACHEALIGNED fancy2struct_; +}; + +void UnpackCounts(uint64 counts, uint32* bucketcounts); + + +#endif // __FANCYLOCK2_H__ + diff --git a/book-user-code/fdiv_hog.cc b/book-user-code/fdiv_hog.cc new file mode 100644 index 000000000000..dc0eb3bd27eb --- /dev/null +++ b/book-user-code/fdiv_hog.cc @@ -0,0 +1,75 @@ +// Sample mystery program to measure how long an FDIV takes. +// Runs on/off ~four times per second for a minute +// Copyright 2021 Richard L. Sites +// +// kutrace version. This does not start or stop tracing, so can run multiple ones +// +// Usage: fdiv_hog [n] +// n msec between iterations. Defaults to 200 +// Compile with +// g++ -O2 fdiv_hog.cc kutrace_lib.cc -o fdiv_hog +// +// Postprocess with +// cat /tmp/fdiv101.trace |./rawtoevent |sort -n |./eventtospan "fdiv101" |sort |./spantotrim 0 |./spantospan 0 >/home/public/time_fdiv101.json + +#include +#include +#include +#include +#include +#include + +#include "kutrace_lib.h" + +static const int kIterations = 1000 * 1000 * 1; // About 5 msec + +static int msec_wait = 200; + +// Sleep for n milliseconds +void msleep(int msec) { + struct timespec ts; + ts.tv_sec = msec / 1000; + ts.tv_nsec = (msec % 1000) * 1000000; + nanosleep(&ts, NULL); +} + +double DoIterations(int n, double start_divd) { + double divd = start_divd; + for (int i = 0; i < n; ++i) { + // Mark every 4096 iterations, so we can see how time changes + // Also reset dividend so we don't underflow + if ((i & 0x0fff) == 0) { + kutrace::mark_d(i >> 10); + divd = start_divd; + } + divd /= 1.000001; + divd /= 1.000000001; + } + return divd; +} + + +int main (int argc, const char** argv) { + if (1 < argc) {msec_wait = atoi(argv[1]);} + + uint64_t startcy, stopcy; + + double divd = 123456789.0; + startcy = __rdtsc(); + divd = DoIterations(kIterations, divd); + stopcy = __rdtsc(); + int64_t elapsed = stopcy - startcy; // Signed to avoid compiled code for negative unsigned + double felapsed = elapsed; + + + // Run for 1 minute approximately + for (int i = 0; i < 60*4; ++i) { + divd = DoIterations(kIterations * 10, divd); + msleep(msec_wait); + } + + fprintf(stdout, "%d iterations, %lu cycles, %4.2f cycles/iteration\n", + kIterations, elapsed, felapsed / kIterations); + fprintf(stdout, "%f\n", divd); // Make divd live + return 0; +} diff --git a/book-user-code/flt_hog.cc b/book-user-code/flt_hog.cc new file mode 100644 index 000000000000..ae5185722099 --- /dev/null +++ b/book-user-code/flt_hog.cc @@ -0,0 +1,105 @@ +// Sample mystery program to load up floating-point execution units. +// Runs on/off ~four times per secfond for a minute +// Copyright 2021 Richard L. Sites +// +// kutrace version. This does not start or stop tracing, so can run multiple ones +// +// Usage: fdiv_hog [n] +// n msec between iterations. Defaults to 200 +// Compile with +// g++ -O2 flt_hog.cc kutrace_lib.cc -o flt_hog +// + +#include +#include +#include +#include +#include + +#include "kutrace_lib.h" +#include "timecounters.h" + +static const int kIterations = 1000 * 1000 * 1; // About 10 msec + +static int msec_wait = 20; + +// Sleep for n milliseconds +void msleep(int msec) { + struct timespec ts; + ts.tv_sec = msec / 1000; + ts.tv_nsec = (msec % 1000) * 1000000; + nanosleep(&ts, NULL); +} + +double DoIterations(int n, double start_divd) { + double divd1 = start_divd; + double divd2 = start_divd; + double prod1 = start_divd; + double prod2 = start_divd; + double sum1 = 0.0; + double sum2 = 0.0; + for (int i = 0; i < n; ++i) { + // Mark every 4096 iterations, so we can see how time changes + if ((i & 0x0fff) == 0) { + kutrace::mark_d(i >> 10); + } + sum1 += prod1; + sum2 += divd1; + prod1 *= 1.000000001; + divd1 /= 1.000000001; + sum1 -= prod2; + sum2 -= divd2; + prod2 *= 0.999999999; + divd2 /= 0.999999999; + } + return divd1 + prod1 + divd2 + prod2 + sum1 + sum2; +} + +double DoIterations2(int n, double start_divd) { + double divd1 = start_divd; + double divd2 = start_divd; + double divd3 = start_divd; + double divd4 = start_divd; + for (int i = 0; i < n; ++i) { + // Mark every 4096 iterations, so we can see how time changes + if ((i & 0x0fff) == 0) { + kutrace::mark_d(i >> 10); + } + divd1 /= 1.000000001; + divd2 /= 0.999999999; + divd3 /= 1.000000002; + divd4 /= 0.999999998; + + divd1 /= 0.999999999; + divd2 /= 1.000000001; + divd3 /= 0.999999998; + divd4 /= 1.000000002; + } + return divd1 + divd2 + divd3 + divd4; +} + + +int main (int argc, const char** argv) { + if (1 < argc) {msec_wait = atoi(argv[1]);} + + uint64_t startcy, stopcy; + + double divd = 123456789.0; + startcy = GetCycles(); + divd = DoIterations2(kIterations, divd); + stopcy = GetCycles(); + int64_t elapsed = stopcy - startcy; // Signed to avoid compiled code for negative unsigned + double felapsed = elapsed; + + + // Run for 1 minute approximately if 20ms wait + for (int i = 0; i < 60*30; ++i) { + divd = DoIterations2(kIterations * 2, divd); + msleep(msec_wait); + } + + fprintf(stdout, "%d iterations, %lu cycles, %4.2f cycles/iteration\n", + kIterations, elapsed, felapsed / kIterations); + fprintf(stdout, "%f\n", divd); // Make divd live + return 0; +} diff --git a/book-user-code/from_base40.cc b/book-user-code/from_base40.cc new file mode 100644 index 000000000000..091f26ac32a7 --- /dev/null +++ b/book-user-code/from_base40.cc @@ -0,0 +1,29 @@ +// Little program to convert from base40 characters +// Copyright 2021 Richard L. Sites + +#include +#include "basetypes.h" + + +static const char kFromBase40[40] = { + '\0','a','b','c', 'd','e','f','g', 'h','i','j','k', 'l','m','n','o', + 'p','q','r','s', 't','u','v','w', 'x','y','z','0', '1','2','3','4', + '5','6','7','8', '9','-','.','/', +}; + +// Unpack six characters from 32 bits. +// str must be 8 bytes. We somewhat-arbitrarily capitalize the first letter +char* Base40ToChar(uint64 base40, char* str) { + base40 &= 0x00000000fffffffflu; // Just low 32 bits + memset(str, 0, 8); + // First character went in last, comes out first + int i = 0; + while (base40 > 0) { + uint64 n40 = base40 % 40; + str[i] = kFromBase40[n40]; + base40 /= 40; + ++i; + } + return str; +} + diff --git a/book-user-code/from_base40.h b/book-user-code/from_base40.h new file mode 100644 index 000000000000..9b2f335c71a4 --- /dev/null +++ b/book-user-code/from_base40.h @@ -0,0 +1,15 @@ +// from_base40.h +// +// This decodes base40 +// +// Copyright 2021 Richard L. Sites + +#ifndef __FROM_BASE40_H__ +#define __FROM_BASE40_H__ + +// Unpack six characters from 32 bits. +// str must be 8 bytes. We somewhat-arbitrarily capitalize the first letter +char* Base40ToChar(uint64 base40, char* str); + +#endif // __FROM_BASE40_H__ + diff --git a/book-user-code/hello_world_trace.c b/book-user-code/hello_world_trace.c new file mode 100644 index 000000000000..71ca6c8661fb --- /dev/null +++ b/book-user-code/hello_world_trace.c @@ -0,0 +1,12 @@ +// Compile with g++ -O2 hello_world_trace.c kutrace_lib.cc -o hello_world_trace + +#include + +#include "kutrace_lib.h" + +int main (int argc, const char** argv) { + kutrace::mark_a("hello"); + fprintf(stdout, "hello world\n"); + kutrace::mark_a("/hello"); + return 0; +} diff --git a/book-user-code/kutrace_control.cc b/book-user-code/kutrace_control.cc new file mode 100644 index 000000000000..94237b74b034 --- /dev/null +++ b/book-user-code/kutrace_control.cc @@ -0,0 +1,226 @@ +// Little program to control KUtrace +// Copyright 2021 Richard L. Sites +// +// 2017.11.16 dsites Updated to include instructions per cycle IPC flag +// 2018.05.08 dsites Updated by switching to using kutrace_lib +// 2019.02.19 dsites Updated ... +// 2020.04.03 dsites Added wait command +// +// This program reads commands from stdin +// +// Compile with gcc -O2 kutrace_control.cc kutrace_lib.cc -o kutrace_control + +#include +#include +#include + +#include // nanosleep +#include // getpid gethostname +//#include + +#include // gettimeofday +#include + +#include "basetypes.h" +#include "kutrace_control_names.h" +#include "kutrace_lib.h" + +/* +TODO: +Rationalize the use of side-effect-free DoTest +*/ + +/* Outgoing arg to DoReset */ +#define DO_IPC 1 +#define DO_WRAP 2 + +typedef uint64 u64; +typedef int64 s64; + +/* For the flags byte in traceblock[1] */ +#define IPC_Flag 0x80ul +#define WRAP_Flag 0x40ul +#define Unused2_Flag 0x20ul +#define Unused1_Flag 0x10ul +#define VERSION_MASK 0x0Ful + +// Module must be at least this version number for us to run +static const u64 kMinModuleVersionNumber = 3; + +// Number of u64 values per trace block (64KB total) +static const int kTraceBufSize = 8192; + +// Number of u64 values per IPC block, one u8 per u64 in trace buf (8KB total) +static const int kIpcBufSize = kTraceBufSize >> 3; + + +// Very first block layout (x86 cycle counter is rdtsc >> 6) +// +-------+-----------------------+-------------------------------+ +// | cpu# | cycle counter | 0 module +// +-------+-----------------------+-------------------------------+ +// | flags | gettimeofday | 1 DoDump +// +-------+-----------------------+-------------------------------+ +// | start cycle counter | 2 DoDump +// +-------------------------------+-------------------------------+ +// | start gettimeofday | 3 DoDump +// +-------------------------------+-------------------------------+ +// | stop cycle counter | 4 DoDump +// +-------------------------------+-------------------------------+ +// | stop gettimeofday | 5 DoDump +// +-------------------------------+-------------------------------+ +// | u n u s e d | 6 +// +-------------------------------+-------------------------------+ +// | u n u s e d | 7 +// +-------------------------------+-------------------------------+ +// +-------------------------------+-------------------------------+ +// | u n u s e d | PID | 8 module +// +-------------------------------+-------------------------------+ +// | u n u s e d | 9 module +// +-------------------------------+-------------------------------+ +// | | 10 module +// + pidname + +// | | 11 module +// +-------------------------------+-------------------------------+ +// | followed by trace entries... | +// ~ ~ +// +// +// All other blocks layout +// +-------+-----------------------+-------------------------------+ +// | cpu# | cycle counter | 0 module +// +-------+-----------------------+-------------------------------+ +// | flags | gettimeofday | 1 DoDump +// +-------+-----------------------+-------------------------------+ +// | u n u s e d | PID | 2 module +// +-------------------------------+-------------------------------+ +// | u n u s e d | 3 module +// +-------------------------------+-------------------------------+ +// | | 4 module +// + pidname + +// | | 5 module +// +-------------------------------+-------------------------------+ +// | followed by trace entries... | +// ~ ~ +// + + + +void Usage() { + fprintf(stderr, "usage: kutrace_control, with sysin lines\n"); + fprintf(stderr, " go, stop [], init, on, off, flush, reset, stat, dump, quit, wait \n"); + exit(0); +} + +// Sleep for n milliseconds +void msleep(int msec) { + struct timespec ts; + ts.tv_sec = msec / 1000; + ts.tv_nsec = (msec % 1000) * 1000000; + nanosleep(&ts, NULL); +} + + +static const int kMaxBufferSize = 256; + +// Read next line, stripping any crlf. Return false if no more. +bool ReadLine(FILE* f, char* buffer, int maxsize) { + char* s = fgets(buffer, maxsize, f); + if (s == NULL) {return false;} + int len = strlen(s); + // Strip any crlf or cr or lf + if (s[len - 1] == '\n') {s[--len] = '\0';} + if (s[len - 1] == '\r') {s[--len] = '\0';} + return true; +} + +// Take a series of commands from stdin +// +// go|goipc|goipcwrap|gowrap +// stop +// init Initialize trace buffer with syscall/irq/trap names +// on Turn on tracing +// off Turn off tracing +// flush With tracing off, zero out the rest of each partly-used traceblock +// reset Set up for a new tracing run +// stat Show some sort of tracing status +// dump Dump the trace buffer to constructed filename +// quit Exit this program +// wait N Wait N seconds +// +// Command-line argument -force ignores any other running tracing and turns it off +// +int main (int argc, const char** argv) { + if ((argc > 1) && (strcmp(argv[1], "-force") == 0)) { + kutrace::DoControl(KUTRACE_CMD_RESET, 0); + } else { + if (!kutrace::TestModule()) { + return 0; + } + } + + u64 control_flags = 0; + // Added: if argv[1] is 1, do "go" and exit with tracing on + // if argv[1] is 0, do "stop" and exit with tracing off + + char fname[256]; + kutrace::MakeTraceFileName("ku", fname); + + if (argc > 1) { + if (strcmp(argv[1], "1") == 0) { + kutrace::DoReset(control_flags); kutrace::DoInit(argv[0]); kutrace::DoOn(); + return 0; + } + if (strcmp(argv[1], "0") == 0) { + /* After DoOff wait 20 msec for any pending tracing to finish */ + kutrace::DoOff(); msleep(20); kutrace::DoFlush(); kutrace::DoDump(fname); kutrace::DoQuit(); + return 0; + } + } + + + // Avoid always reseting so we can possibly start this program with tracing on + char buffer[kMaxBufferSize]; + fprintf(stdout, "control> "); + fflush(stdout); + while (ReadLine(stdin, buffer, kMaxBufferSize)) { + if (buffer[0] == '\0') {kutrace::DoStat(control_flags);} + else if (strcmp(buffer, "init") == 0) {kutrace::DoInit(argv[0]);} + else if (strcmp(buffer, "test") == 0) {kutrace::DoTest();} + else if (strcmp(buffer, "on") == 0) {kutrace::DoOn();} + else if (strcmp(buffer, "off") == 0) {kutrace::DoOff(); msleep(20);} + else if (strcmp(buffer, "flush") == 0) {kutrace::DoFlush();} + else if (strcmp(buffer, "reset") == 0) {kutrace::DoReset(control_flags);} + else if (strcmp(buffer, "stat") == 0) {kutrace::DoStat(control_flags);} + else if (strcmp(buffer, "dump") == 0) {kutrace::DoDump(fname);} + else if (strcmp(buffer, "go") == 0) { + control_flags = 0; kutrace::DoReset(control_flags); kutrace::DoInit(argv[0]); kutrace::DoOn(); + } else if (strcmp(buffer, "goipc") == 0) { + control_flags |= DO_IPC; kutrace::DoReset(control_flags); kutrace::DoInit(argv[0]); kutrace::DoOn(); + } else if (strcmp(buffer, "gowrap") == 0) { + control_flags |= DO_WRAP; kutrace::DoReset(control_flags); kutrace::DoInit(argv[0]); kutrace::DoOn(); + } else if ((strcmp(buffer, "goipcwrap") == 0) || (strcmp(buffer, "gowrapipc") == 0)) { + control_flags |= (DO_IPC | DO_WRAP); kutrace::DoReset(control_flags); kutrace::DoInit(argv[0]); kutrace::DoOn(); + } else if (memcmp(buffer, "stop", 4) == 0) { + /* After DoOff wait 20 msec for any pending tracing to finish */ + /* Pick off filename if any */ + if (strlen(buffer) > 5) {strcpy(fname, buffer + 5);} + kutrace::DoOff(); msleep(20); kutrace::DoFlush(); kutrace::DoDump(fname); control_flags = 0; kutrace::DoQuit(); + } else if (strcmp(buffer, "quit") == 0) {kutrace::DoQuit();} + else if (strcmp(buffer, "exit") == 0) {kutrace::DoQuit();} + else if (memcmp(buffer, "wait ", 5) == 0) { + int n = atoi(buffer + 5); + msleep(n * 1000); + } else { + fprintf(stdout, "Not recognized '%s'\n", buffer); + fprintf(stdout, " go goipc stop init on off flush reset stat dump quit\n"); + } + + fprintf(stdout, "control> "); + fflush(stdout); + } + + return 0; +} + + + diff --git a/book-user-code/kutrace_control_names.h b/book-user-code/kutrace_control_names.h new file mode 100644 index 000000000000..d9dd2b9265d4 --- /dev/null +++ b/book-user-code/kutrace_control_names.h @@ -0,0 +1,34 @@ +// Names for syscall, etc. in dclab_tracing +// Copyright 2021 Richard L. Sites +// +// These are from linux-4.19.19 x86 AMD 64-bit. Others will vary. +// + +#ifndef __KUTRACE_CONTROL_NAMES_H__ +#define __KUTRACE_CONTROL_NAMES_H__ + +/* Add others as you find and test them */ +#define Isx86_64 defined(__x86_64) +#define IsAmd_64 Isx86_64 && defined(__znver1) +#define IsIntel_64 Isx86_64 && !defined(__znver1) + +#define IsArm_64 defined(__aarch64__) +#define IsRPi4 defined(__ARM_ARCH) && (__ARM_ARCH == 8) +#define IsRPi4_64 IsRPi4 && IsArm_64 + +#if IsAmd_64 +#include "kutrace_control_names_ryzen.h" + +#elif IsIntel_64 +#include "kutrace_control_names_i3.h" + +#elif IsRPi4_64 +#include "kutrace_control_names_rpi4.h" + +#else +#error Need control_names for your architecture +#endif + +#endif // __KUTRACE_CONTROL_NAMES_H__ + + diff --git a/book-user-code/kutrace_control_names_i3.h b/book-user-code/kutrace_control_names_i3.h new file mode 100644 index 000000000000..474cdd0fd3a7 --- /dev/null +++ b/book-user-code/kutrace_control_names_i3.h @@ -0,0 +1,527 @@ +// Names for syscall, etc. in dclab_tracing +// Copyright 2021 Richard L. Sites +// +// These are from linux-4.19.19 x86 AMD 64-bit. Others will vary. +// + +#ifndef __KUTRACE_CONTROL_NAMES_I3_H__ +#define __KUTRACE_CONTROL_NAMES_I3_H__ + +#include "kutrace_lib.h" + +// Get rid of this +static const char* CpuFamilyModelManuf = "23 17 AMD"; + +static const NumNamePair PidNames[] = { + {0, "-idle-"}, + {-1, NULL}, // Must be last +}; + +static const NumNamePair Syscall32Names[] = { + {511, "-sched-"}, // Fake last syscall. Indicates where __schedule runs + {-1, NULL}, // Must be last +}; + +static const NumNamePair Syscall64Names[] = { + {0, "read"}, + {1, "write"}, + {2, "open"}, + {3, "close"}, + {4, "stat"}, + {5, "fstat"}, + {6, "lstat"}, + {7, "poll"}, + {8, "lseek"}, + {9, "mmap"}, + {10, "mprotect"}, + {11, "munmap"}, + {12, "brk"}, + {13, "rt_sigaction"}, + {14, "rt_sigprocmask"}, + {15, "rt_sigreturn"}, + {16, "ioctl"}, + {17, "pread64"}, + {18, "pwrite64"}, + {19, "readv"}, + {20, "writev"}, + {21, "access"}, + {22, "pipe"}, + {23, "select"}, + {24, "sched_yield"}, + {25, "mremap"}, + {26, "msync"}, + {27, "mincore"}, + {28, "madvise"}, + {29, "shmget"}, + {30, "shmat"}, + {31, "shmctl"}, + {32, "dup"}, + {33, "dup2"}, + {34, "pause"}, + {35, "nanosleep"}, + {36, "getitimer"}, + {37, "alarm"}, + {38, "setitimer"}, + {39, "getpid"}, + {40, "sendfile"}, + {41, "socket"}, + {42, "connect"}, + {43, "accept"}, + {44, "sendto"}, + {45, "recvfrom"}, + {46, "sendmsg"}, + {47, "recvmsg"}, + {48, "shutdown"}, + {49, "bind"}, + {50, "listen"}, + {51, "getsockname"}, + {52, "getpeername"}, + {53, "socketpair"}, + {54, "setsockopt"}, + {55, "getsockopt"}, + {56, "clone"}, + {57, "fork"}, + {58, "vfork"}, + {59, "execve"}, + {60, "exit"}, + {61, "wait4"}, + {62, "kill"}, + {63, "uname"}, + {64, "semget"}, + {65, "semop"}, + {66, "semctl"}, + {67, "shmdt"}, + {68, "msgget"}, + {69, "msgsnd"}, + {70, "msgrcv"}, + {71, "msgctl"}, + {72, "fcntl"}, + {73, "flock"}, + {74, "fsync"}, + {75, "fdatasync"}, + {76, "truncate"}, + {77, "ftruncate"}, + {78, "getdents"}, + {79, "getcwd"}, + {80, "chdir"}, + {81, "fchdir"}, + {82, "rename"}, + {83, "mkdir"}, + {84, "rmdir"}, + {85, "creat"}, + {86, "link"}, + {87, "unlink"}, + {88, "symlink"}, + {89, "readlink"}, + {90, "chmod"}, + {91, "fchmod"}, + {92, "chown"}, + {93, "fchown"}, + {94, "lchown"}, + {95, "umask"}, + {96, "gettimeofday"}, + {97, "getrlimit"}, + {98, "getrusage"}, + {99, "sysinfo"}, + {100, "times"}, + {101, "ptrace"}, + {102, "getuid"}, + {103, "syslog"}, + {104, "getgid"}, + {105, "setuid"}, + {106, "setgid"}, + {107, "geteuid"}, + {108, "getegid"}, + {109, "setpgid"}, + {110, "getppid"}, + {111, "getpgrp"}, + {112, "setsid"}, + {113, "setreuid"}, + {114, "setregid"}, + {115, "getgroups"}, + {116, "setgroups"}, + {117, "setresuid"}, + {118, "getresuid"}, + {119, "setresgid"}, + {120, "getresgid"}, + {121, "getpgid"}, + {122, "setfsuid"}, + {123, "setfsgid"}, + {124, "getsid"}, + {125, "capget"}, + {126, "capset"}, + {127, "rt_sigpending"}, + {128, "rt_sigtimedwait"}, + {129, "rt_sigqueueinfo"}, + {130, "rt_sigsuspend"}, + {131, "sigaltstack"}, + {132, "utime"}, + {133, "mknod"}, + {134, "uselib"}, + {135, "personality"}, + {136, "ustat"}, + {137, "statfs"}, + {138, "fstatfs"}, + {139, "sysfs"}, + {140, "getpriority"}, + {141, "setpriority"}, + {142, "sched_setparam"}, + {143, "sched_getparam"}, + {144, "sched_setscheduler"}, + {145, "sched_getscheduler"}, + {146, "sched_get_priority_max"}, + {147, "sched_get_priority_min"}, + {148, "sched_rr_get_interval"}, + {149, "mlock"}, + {150, "munlock"}, + {151, "mlockall"}, + {152, "munlockall"}, + {153, "vhangup"}, + {154, "modify_ldt"}, + {155, "pivot_root"}, + {156, "_sysctl"}, + {157, "prctl"}, + {158, "arch_prctl"}, + {159, "adjtimex"}, + {160, "setrlimit"}, + {161, "chroot"}, + {162, "sync"}, + {163, "acct"}, + {164, "settimeofday"}, + {165, "mount"}, + {166, "umount2"}, + {167, "swapon"}, + {168, "swapoff"}, + {169, "reboot"}, + {170, "sethostname"}, + {171, "setdomainname"}, + {172, "iopl"}, + {173, "ioperm"}, + {174, "create_module"}, + {175, "init_module"}, + {176, "delete_module"}, + {177, "get_kernel_syms"}, + {178, "query_module"}, + {179, "quotactl"}, + {180, "nfsservctl"}, + {181, "getpmsg"}, + {182, "putpmsg"}, + {183, "afs_syscall"}, + {184, "tuxcall"}, + {185, "security"}, + {186, "gettid"}, + {187, "readahead"}, + {188, "setxattr"}, + {189, "lsetxattr"}, + {190, "fsetxattr"}, + {191, "getxattr"}, + {192, "lgetxattr"}, + {193, "fgetxattr"}, + {194, "listxattr"}, + {195, "llistxattr"}, + {196, "flistxattr"}, + {197, "removexattr"}, + {198, "lremovexattr"}, + {199, "fremovexattr"}, + {200, "tkill"}, + {201, "time"}, + {202, "futex"}, + {203, "sched_setaffinity"}, + {204, "sched_getaffinity"}, + {205, "set_thread_area"}, + {206, "io_setup"}, + {207, "io_destroy"}, + {208, "io_getevents"}, + {209, "io_submit"}, + {210, "io_cancel"}, + {211, "get_thread_area"}, + {212, "lookup_dcookie"}, + {213, "epoll_create"}, + {214, "epoll_ctl_old"}, + {215, "epoll_wait_old"}, + {216, "remap_file_pages"}, + {217, "getdents64"}, + {218, "set_tid_address"}, + {219, "restart_syscall"}, + {220, "semtimedop"}, + {221, "fadvise64"}, + {222, "timer_create"}, + {223, "timer_settime"}, + {224, "timer_gettime"}, + {225, "timer_getoverrun"}, + {226, "timer_delete"}, + {227, "clock_settime"}, + {228, "clock_gettime"}, + {229, "clock_getres"}, + {230, "clock_nanosleep"}, + {231, "exit_group"}, + {232, "epoll_wait"}, + {233, "epoll_ctl"}, + {234, "tgkill"}, + {235, "utimes"}, + {236, "vserver"}, + {237, "mbind"}, + {238, "set_mempolicy"}, + {239, "get_mempolicy"}, + {240, "mq_open"}, + {241, "mq_unlink"}, + {242, "mq_timedsend"}, + {243, "mq_timedreceive"}, + {244, "mq_notify"}, + {245, "mq_getsetattr"}, + {246, "kexec_load"}, + {247, "waitid"}, + {248, "add_key"}, + {249, "request_key"}, + {250, "keyctl"}, + {251, "ioprio_set"}, + {252, "ioprio_get"}, + {253, "inotify_init"}, + {254, "inotify_add_watch"}, + {255, "inotify_rm_watch"}, + {256, "migrate_pages"}, + {257, "openat"}, + {258, "mkdirat"}, + {259, "mknodat"}, + {260, "fchownat"}, + {261, "futimesat"}, + {262, "newfstatat"}, + {263, "unlinkat"}, + {264, "renameat"}, + {265, "linkat"}, + {266, "symlinkat"}, + {267, "readlinkat"}, + {268, "fchmodat"}, + {269, "faccessat"}, + {270, "pselect6"}, + {271, "ppoll"}, + {272, "unshare"}, + {273, "set_robust_list"}, + {274, "get_robust_list"}, + {275, "splice"}, + {276, "tee"}, + {277, "sync_file_range"}, + {278, "vmsplice"}, + {279, "move_pages"}, + {280, "utimensat"}, + {281, "epoll_pwait"}, + {282, "signalfd"}, + {283, "timerfd_create"}, + {284, "eventfd"}, + {285, "fallocate"}, + {286, "timerfd_settime"}, + {287, "timerfd_gettime"}, + {288, "accept4"}, + {289, "signalfd4"}, + {290, "eventfd2"}, + {291, "epoll_create1"}, + {292, "dup3"}, + {293, "pipe2"}, + {294, "inotify_init1"}, + {295, "preadv"}, + {296, "pwritev"}, + {297, "rt_tgsigqueueinfo"}, + {298, "perf_event_open"}, + {299, "recvmmsg"}, + {300, "fanotify_init"}, + {301, "fanotify_mark"}, + {302, "prlimit64"}, + {303, "name_to_handle_at"}, + {304, "open_by_handle_at"}, + {305, "clock_adjtime"}, + {306, "syncfs"}, + {307, "sendmmsg"}, + {308, "setns"}, + {309, "getcpu"}, + {310, "process_vm_readv"}, + {311, "process_vm_writev"}, + {312, "kcmp"}, + {313, "finit_module"}, + {314, "sched_setattr"}, + {315, "sched_getattr"}, + {316, "renameat2"}, + {317, "seccomp"}, + {318, "getrandom"}, + {319, "memfd_create"}, + {320, "kexec_file_load"}, + {321, "bpf"}, + {322, "execveat"}, + {323, "userfaultfd"}, + {324, "membarrier"}, + {325, "mlock2"}, + {511, "-sched-"}, // Fake last syscall. Indicates where __schedule runs + {-1, NULL}, // Must be last +}; + +// Based on arch/x86/include/asm/x86/irq_vectors.h +// 2017: arch/x86/include/asm/irq_vectors.h +// 2019: arch/x86/include/asm/irq_vectors.h +/* + * Vectors 0 ... 31 : system traps and exceptions - hardcoded events + * Vectors 32 ... 127 : device interrupts + * Vector 128 : legacy int80 syscall interface + * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts + * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts + */ + +static const NumNamePair IrqNames[] = { + // 2017 machines + {0, "timer"}, // timer + {1, "i8042_keyboard1"}, // keyboard/touchpad/mouse + {3, "int3"}, // int3 + {8, "rtc0"}, // real-time clock chip + {9, "acpi"}, + {12, "i8042_keyboard12"}, // keyboard/touchpad/mouse + {16, "usb1"}, + {23, "usb2"}, + {24, "i915_graphics"}, // usb + {28, "enp2s0_eth0"}, // aka eth0 + {29, "hda_29_inner"}, // disk + {30, "hda_30_inner"}, // disk + {31, "mei_me"}, // Management Engine Interface + //{29, "snd_hda_29"}, // audio ?? maybe 32/33 now + //{30, "snd_hda_30"}, // audio ?? + {36, "hda_36"}, + + // 2017 for our particular machines dclab-1,2,3,4 + {0x22, "eth0"}, // eth0 + {0x42, "hda_29"}, // disk, forwarded to 29 + {0x52, "hda_30"}, // disk, forwarded to 30 + {0x62, "hda_31"}, // disk + {0xb1, "graphics"}, // ether?, forwards to 24, no return + + // 2018 for our particular machines dclab-1,2,3 + {0xb2, "eth0"}, // ethernet + + + {128, "int80"}, + + {129, "eth0"}, + +// 2019.03.05 Linux 4.19 Ryzen */ + //{0x21, "??"}, // 1/sec + {0x23, "eth0"}, // 18/sec + //{0x24, "??"}, // 129 as 64+64 5 sec + {0x25, "eth0"}, // aka eth0 + {0x27, "sdb2"}, // aka disk + + // {255, "spurious_apic"}, + {255, "BH"}, // bottom half of an interrupt handler + {254, "error_apic_ipi"}, + {253, "reschedule_ipi"}, + {252, "call_func_ipi"}, + {251, "call_func1_ipi"}, + {250, "thermal_apic_ipi"}, + {249, "threshold_apic_ipi"}, + {248, "reboot_ipi"}, + {247, "x86_platform_ipi"}, + {246, "irq_work_ipi"}, + {245, "uv_bau_message"}, + {244, "deferred_error"}, + {243, "hypervisor_callback"}, + {242, "posted_intr"}, + {241, "posted_intr_wakeup"}, + {240, "posted_intr_nested"}, + {239, "managed_irq_shutdown"}, + {238, "hyperv_reenlighten"}, + {237, "hyperv_stimer0"}, + {236, "local_timer_vector"}, // event 0x05ec, decimal 1516 4.19 x86 + + {13, "fpu_irq"}, + + {-1, NULL}, // Must be last +}; + +// Export this to raw2event.cc, using above value +static const int kTIMER_IRQ_EVENT = 0x05ec; + +// Bottom half BH vectors, from include/linux/interrupt.h +static const NumNamePair SoftIrqNames[] = { + {0, "HI_SOFTIRQ"}, + {1, "TIMER_SOFTIRQ"}, + {2, "NET_TX_SOFTIRQ"}, + {3, "NET_RX_SOFTIRQ"}, + {4, "BLOCK_SOFTIRQ"}, + {5, "IRQ_POLL_SOFTIRQ"}, + {6, "TASKLET_SOFTIRQ"}, + {7, "SCHED_SOFTIRQ"}, + {8, "HRTIMER_SOFTIRQ"}, + {9, "RCU_SOFTIRQ"}, + + {-1, NULL}, // Must be last +}; + +static const NumNamePair TrapNames[] = { + {0, "Divide-by-zero"}, + {1, "Debug"}, + {2, "Non-maskable_Interrupt"}, + {3, "Breakpoint"}, + {4, "Overflow"}, + {5, "Bound_Range_Exceeded"}, + {6, "Invalid_Opcode"}, + {7, "device_not_available"}, + {8, "Double_Fault"}, + {9, "Coprocessor_Segment_Overrun"}, + {10, "Invalid_TSS"}, + {11, "Segment_Not_Present"}, + {12, "Stack_Segment_Fault"}, + {13, "General_Protection_Fault"}, + {14, "page_fault"}, + {15, "Spurious_Interrupt"}, + {16, "x87_Floating-Point_Exception"}, + {17, "Alignment_Check"}, + {18, "Machine_Check"}, + {19, "SIMD_Floating-Point_Exception"}, + {32, "IRET_Exception"}, + + {-1, NULL}, // Must be last +}; + +// This is just the base set. More could be added later +// see linux-4.19.19/tools/include/uapi/asm-generic/errno-base.h +// linux-4.19.19/include/linux/errno.h +// linux-4.19.19/include/uapi/linux/errno.h +// linux-4.19.19/include/uapi/asm-generic/errno.h + +static const NumNamePair ErrnoNames[] = { + {1, "EPERM"}, + {2, "ENOENT"}, + {3, "ESRCH"}, + {4, "EINTR"}, + {5, "EIO"}, + {6, "ENXIO"}, + {7, "E2BIG"}, + {8, "ENOEXEC"}, + {9, "EBADF"}, + {10, "ECHILD"}, + {11, "EAGAIN"}, + {12, "ENOMEM"}, + {13, "EACCES"}, + {14, "EFAULT"}, + {15, "ENOTBLK"}, + {16, "EBUSY"}, + {17, "EEXIST"}, + {18, "EXDEV"}, + {19, "ENODEV"}, + {20, "ENOTDIR"}, + {21, "EISDIR"}, + {22, "EINVAL"}, + {23, "ENFILE"}, + {24, "EMFILE"}, + {25, "ENOTTY"}, + {26, "ETXTBSY"}, + {27, "EFBIG"}, + {28, "ENOSPC"}, + {29, "ESPIPE"}, + {30, "EROFS"}, + {31, "EMLINK"}, + {32, "EPIPE"}, + {33, "EDOM"}, + {34, "ERANGE"}, + + {-1, NULL}, // Must be last + +}; +#endif // __KUTRACE_CONTROL_NAMES_I3_H__ + + diff --git a/book-user-code/kutrace_control_names_rpi4.h b/book-user-code/kutrace_control_names_rpi4.h new file mode 100644 index 000000000000..e2cceb6af097 --- /dev/null +++ b/book-user-code/kutrace_control_names_rpi4.h @@ -0,0 +1,493 @@ +// Names for syscall, etc. in KUtrace +// Copyright 2021 Richard L. Sites +// +// These are from linux-5.10.46 x86 RPi4 64-bit. Others will vary. +// + +#ifndef __KUTRACE_CONTROL_NAMES_RPI4_H__ +#define __KUTRACE_CONTROL_NAMES_RPI4_H__ + +#include "kutrace_lib.h" + +static const NumNamePair PidNames[] = { + {0, "-idle-"}, + {-1, NULL}, // Must be last +}; + +static const NumNamePair Syscall32Names[] = { + {511, "-sched-"}, // Fake last syscall. Indicates where __schedule runs + {-1, NULL}, // Must be last +}; + +static const NumNamePair Syscall64Names[] = { +// Rpi4 names from include/uapi/asm-generic/unistd.h +// via grep NR | sed 's/^.*NR[^_]*_\([^ ]*\) *\([0-9]*\).*$/ {\2, "\1"},/' +// Since these are just name labels, we don't bother with the NR3264 stuff + + {0, "io_setup"}, + {1, "io_destroy"}, + {2, "io_submit"}, + {3, "io_cancel"}, + {4, "io_getevents"}, + {5, "setxattr"}, + {6, "lsetxattr"}, + {7, "fsetxattr"}, + {8, "getxattr"}, + {9, "lgetxattr"}, + {10, "fgetxattr"}, + {11, "listxattr"}, + {12, "llistxattr"}, + {13, "flistxattr"}, + {14, "removexattr"}, + {15, "lremovexattr"}, + {16, "fremovexattr"}, + {17, "getcwd"}, + {18, "lookup_dcookie"}, + {19, "eventfd2"}, + {20, "epoll_create1"}, + {21, "epoll_ctl"}, + {22, "epoll_pwait"}, + {23, "dup"}, + {24, "dup3"}, + {25, "fcntl"}, + {26, "inotify_init1"}, + {27, "inotify_add_watch"}, + {28, "inotify_rm_watch"}, + {29, "ioctl"}, + {30, "ioprio_set"}, + {31, "ioprio_get"}, + {32, "flock"}, + {33, "mknodat"}, + {34, "mkdirat"}, + {35, "unlinkat"}, + {36, "symlinkat"}, + {37, "linkat"}, + {38, "renameat"}, + {39, "umount2"}, + {40, "mount"}, + {41, "pivot_root"}, + {42, "nfsservctl"}, + {43, "statfs"}, + {44, "fstatfs"}, + {45, "truncate"}, + {46, "ftruncate"}, + {47, "fallocate"}, + {48, "faccessat"}, + {49, "chdir"}, + {50, "fchdir"}, + {51, "chroot"}, + {52, "fchmod"}, + {53, "fchmodat"}, + {54, "fchownat"}, + {55, "fchown"}, + {56, "openat"}, + {57, "close"}, + {58, "vhangup"}, + {59, "pipe2"}, + {60, "quotactl"}, + {61, "getdents64"}, + {62, "lseek"}, + {63, "read"}, + {64, "write"}, + {65, "readv"}, + {66, "writev"}, + {67, "pread64"}, + {68, "pwrite64"}, + {69, "preadv"}, + {70, "pwritev"}, + {71, "sendfile"}, + {72, "pselect6"}, + {73, "ppoll"}, + {74, "signalfd4"}, + {75, "vmsplice"}, + {76, "splice"}, + {77, "tee"}, + {78, "readlinkat"}, + {79, "fstatat"}, + {80, "fstat"}, + {81, "sync"}, + {82, "fsync"}, + {83, "fdatasync"}, + {84, "sync_file_range2"}, + {84, "sync_file_range"}, + {85, "timerfd_create"}, + {86, "timerfd_settime"}, + {87, "timerfd_gettime"}, + {88, "utimensat"}, + {89, "acct"}, + {90, "capget"}, + {91, "capset"}, + {92, "personality"}, + {93, "exit"}, + {94, "exit_group"}, + {95, "waitid"}, + {96, "set_tid_address"}, + {97, "unshare"}, + {98, "futex"}, + {99, "set_robust_list"}, + {100, "get_robust_list"}, + {101, "nanosleep"}, + {102, "getitimer"}, + {103, "setitimer"}, + {104, "kexec_load"}, + {105, "init_module"}, + {106, "delete_module"}, + {107, "timer_create"}, + {108, "timer_gettime"}, + {109, "timer_getoverrun"}, + {110, "timer_settime"}, + {111, "timer_delete"}, + {112, "clock_settime"}, + {113, "clock_gettime"}, + {114, "clock_getres"}, + {115, "clock_nanosleep"}, + {116, "syslog"}, + {117, "ptrace"}, + {118, "sched_setparam"}, + {119, "sched_setscheduler"}, + {120, "sched_getscheduler"}, + {121, "sched_getparam"}, + {122, "sched_setaffinity"}, + {123, "sched_getaffinity"}, + {124, "sched_yield"}, + {125, "sched_get_priority_max"}, + {126, "sched_get_priority_min"}, + {127, "sched_rr_get_interval"}, + {128, "restart_syscall"}, + {129, "kill"}, + {130, "tkill"}, + {131, "tgkill"}, + {132, "sigaltstack"}, + {133, "rt_sigsuspend"}, + {134, "rt_sigaction"}, + {135, "rt_sigprocmask"}, + {136, "rt_sigpending"}, + {137, "rt_sigtimedwait"}, + {138, "rt_sigqueueinfo"}, + {139, "rt_sigreturn"}, + {140, "setpriority"}, + {141, "getpriority"}, + {142, "reboot"}, + {143, "setregid"}, + {144, "setgid"}, + {145, "setreuid"}, + {146, "setuid"}, + {147, "setresuid"}, + {148, "getresuid"}, + {149, "setresgid"}, + {150, "getresgid"}, + {151, "setfsuid"}, + {152, "setfsgid"}, + {153, "times"}, + {154, "setpgid"}, + {155, "getpgid"}, + {156, "getsid"}, + {157, "setsid"}, + {158, "getgroups"}, + {159, "setgroups"}, + {160, "uname"}, + {161, "sethostname"}, + {162, "setdomainname"}, + {163, "getrlimit"}, + {164, "setrlimit"}, + {165, "getrusage"}, + {166, "umask"}, + {167, "prctl"}, + {168, "getcpu"}, + {169, "gettimeofday"}, + {170, "settimeofday"}, + {171, "adjtimex"}, + {172, "getpid"}, + {173, "getppid"}, + {174, "getuid"}, + {175, "geteuid"}, + {176, "getgid"}, + {177, "getegid"}, + {178, "gettid"}, + {179, "sysinfo"}, + {180, "mq_open"}, + {181, "mq_unlink"}, + {182, "mq_timedsend"}, + {183, "mq_timedreceive"}, + {184, "mq_notify"}, + {185, "mq_getsetattr"}, + {186, "msgget"}, + {187, "msgctl"}, + {188, "msgrcv"}, + {189, "msgsnd"}, + {190, "semget"}, + {191, "semctl"}, + {192, "semtimedop"}, + {193, "semop"}, + {194, "shmget"}, + {195, "shmctl"}, + {196, "shmat"}, + {197, "shmdt"}, + {198, "socket"}, + {199, "socketpair"}, + {200, "bind"}, + {201, "listen"}, + {202, "accept"}, + {203, "connect"}, + {204, "getsockname"}, + {205, "getpeername"}, + {206, "sendto"}, + {207, "recvfrom"}, + {208, "setsockopt"}, + {209, "getsockopt"}, + {210, "shutdown"}, + {211, "sendmsg"}, + {212, "recvmsg"}, + {213, "readahead"}, + {214, "brk"}, + {215, "munmap"}, + {216, "mremap"}, + {217, "add_key"}, + {218, "request_key"}, + {219, "keyctl"}, + {220, "clone"}, + {221, "execve"}, + {222, "mmap"}, + {223, "fadvise64"}, + {224, "swapon"}, + {225, "swapoff"}, + {226, "mprotect"}, + {227, "msync"}, + {228, "mlock"}, + {229, "munlock"}, + {230, "mlockall"}, + {231, "munlockall"}, + {232, "mincore"}, + {233, "madvise"}, + {234, "remap_file_pages"}, + {235, "mbind"}, + {236, "get_mempolicy"}, + {237, "set_mempolicy"}, + {238, "migrate_pages"}, + {239, "move_pages"}, + {240, "rt_tgsigqueueinfo"}, + {241, "perf_event_open"}, + {242, "accept4"}, + {243, "recvmmsg"}, + {244, "arch_specific_syscall"}, + {260, "wait4"}, + {261, "prlimit64"}, + {262, "fanotify_init"}, + {263, "fanotify_mark"}, + {264, "name_to_handle_at"}, + {265, "open_by_handle_at"}, + {266, "clock_adjtime"}, + {267, "syncfs"}, + {268, "setns"}, + {269, "sendmmsg"}, + {270, "process_vm_readv"}, + {271, "process_vm_writev"}, + {272, "kcmp"}, + {273, "finit_module"}, + {274, "sched_setattr"}, + {275, "sched_getattr"}, + {276, "renameat2"}, + {277, "seccomp"}, + {278, "getrandom"}, + {279, "memfd_create"}, + {280, "bpf"}, + {281, "execveat"}, + {282, "userfaultfd"}, + {283, "membarrier"}, + {284, "mlock2"}, + {285, "copy_file_range"}, + {286, "preadv2"}, + {287, "pwritev2"}, + {288, "pkey_mprotect"}, + {289, "pkey_alloc"}, + {290, "pkey_free"}, + {291, "statx"}, + {292, "io_pgetevents"}, + {293, "rseq"}, + {294, "kexec_file_load"}, + {403, "clock_gettime64"}, + {404, "clock_settime64"}, + {405, "clock_adjtime64"}, + {406, "clock_getres_time64"}, + {407, "clock_nanosleep_time64"}, + {408, "timer_gettime64"}, + {409, "timer_settime64"}, + {410, "timerfd_gettime64"}, + {411, "timerfd_settime64"}, + {412, "utimensat_time64"}, + {413, "pselect6_time64"}, + {414, "ppoll_time64"}, + {416, "io_pgetevents_time64"}, + {417, "recvmmsg_time64"}, + {418, "mq_timedsend_time64"}, + {419, "mq_timedreceive_time64"}, + {420, "semtimedop_time64"}, + {421, "rt_sigtimedwait_time64"}, + {422, "futex_time64"}, + {423, "sched_rr_get_interval_time64"}, + {424, "pidfd_send_signal"}, + {425, "io_uring_setup"}, + {426, "io_uring_enter"}, + {427, "io_uring_register"}, + {428, "open_tree"}, + {429, "move_mount"}, + {430, "fsopen"}, + {431, "fsconfig"}, + {432, "fsmount"}, + {433, "fspick"}, + {434, "pidfd_open"}, + {435, "clone3"}, + {436, "close_range"}, + {437, "openat2"}, + {438, "pidfd_getfd"}, + {439, "faccessat2"}, + {440, "process_madvise"}, + + {511, "-sched-"}, // Fake last syscall. Indicates where __schedule runs + {-1, NULL}, // Must be last +}; + +// From cat /proc/interrupts on RPi-4B 64-bit +static const NumNamePair IrqNames[] = { + {2, "resched_ipi"}, + {9, "vgic"}, + {11, "arch_timer"}, + {12, "kvm_guest_vtimer"}, + {18, "gpu_mbox"}, /* fe00b880.mailbox */ + {21, "uart-pl011"}, + {24, "dma_irq"}, + {31, "vchiq_doorbell"}, + {32, "eth0"}, /* mmc0_mmc1 */ + {33, "vc4_firmware_kms"}, + {34, "arm-pmu"}, + {35, "arm-pmu"}, + {36, "arm-pmu"}, + {37, "arm-pmu"}, + {38, "ssd"}, + {39, "eth0?"}, + {40, "eth00?"}, + {46, "v3d"}, + {48, "xhci_hcd"}, + +// Raspberry Pi list plus some x86 + +// {255, "spurious_apic"}, + {255, "BH"}, /* bottom half of an interrupt handler */ + {254, "unused_ipi"}, + {253, "unused_ipi"}, + {252, "wakeup_ipi"}, + {251, "irq_work_ipi"}, + {250, "timer_ipi"}, + {249, "cpu_crash_stop_ipi"}, + {248, "cpu_stop_ipi"}, + {247, "call_func_ipi"}, + {246, "reschedule_ipi"}, + + {245, "uv_bau_message"}, /* from x86 */ + + {244, "deferred_error"}, + {243, "hypervisor_callback"}, + {242, "posted_intr"}, + {241, "posted_intr_wakeup"}, + {240, "posted_intr_nested"}, + {239, "managed_irq_shutdown"}, + {238, "hyperv_reenlighten"}, + {237, "hyperv_stimer0"}, + {236, "local_timer_vector"}, // event 0x05ec, decimal 1516 4.19 x86 + + {-1, NULL}, // Must be last +}; + +// Export this to raw2event.cc, using above value +static const int kTIMER_IRQ_EVENT = 0x05ec; + +// Bottom half BH vectors, from include/linux/interrupt.h +static const NumNamePair SoftIrqNames[] = { + {0, "HI_SOFTIRQ"}, + {1, "TIMER_SOFTIRQ"}, + {2, "NET_TX_SOFTIRQ"}, + {3, "NET_RX_SOFTIRQ"}, + {4, "BLOCK_SOFTIRQ"}, + {5, "IRQ_POLL_SOFTIRQ"}, + {6, "TASKLET_SOFTIRQ"}, + {7, "SCHED_SOFTIRQ"}, + {8, "HRTIMER_SOFTIRQ"}, + {9, "RCU_SOFTIRQ"}, + + {-1, NULL}, // Must be last +}; + +static const NumNamePair TrapNames[] = { + {0, "Divide-by-zero"}, + {1, "Debug"}, + {2, "Non-maskable_Interrupt"}, + {3, "Breakpoint"}, + {4, "Overflow"}, + {5, "Bound_Range_Exceeded"}, + {6, "Invalid_Opcode"}, + {7, "device_not_available"}, + {8, "Double_Fault"}, + {9, "Coprocessor_Segment_Overrun"}, + {10, "Invalid_TSS"}, + {11, "Segment_Not_Present"}, + {12, "Stack_Segment_Fault"}, + {13, "General_Protection_Fault"}, + {14, "page_fault"}, + {15, "Spurious_Interrupt"}, + {16, "x87_Floating-Point_Exception"}, + {17, "Alignment_Check"}, + {18, "Machine_Check"}, + {19, "SIMD_Floating-Point_Exception"}, + {32, "IRET_Exception"}, + + {-1, NULL}, // Must be last +}; + +// This is just the base set. More could be added later +// see linux-4.19.19/tools/include/uapi/asm-generic/errno-base.h +// linux-4.19.19/include/linux/errno.h +// linux-4.19.19/include/uapi/linux/errno.h +// linux-4.19.19/include/uapi/asm-generic/errno.h + +static const NumNamePair ErrnoNames[] = { + {1, "EPERM"}, + {2, "ENOENT"}, + {3, "ESRCH"}, + {4, "EINTR"}, + {5, "EIO"}, + {6, "ENXIO"}, + {7, "E2BIG"}, + {8, "ENOEXEC"}, + {9, "EBADF"}, + {10, "ECHILD"}, + {11, "EAGAIN"}, + {12, "ENOMEM"}, + {13, "EACCES"}, + {14, "EFAULT"}, + {15, "ENOTBLK"}, + {16, "EBUSY"}, + {17, "EEXIST"}, + {18, "EXDEV"}, + {19, "ENODEV"}, + {20, "ENOTDIR"}, + {21, "EISDIR"}, + {22, "EINVAL"}, + {23, "ENFILE"}, + {24, "EMFILE"}, + {25, "ENOTTY"}, + {26, "ETXTBSY"}, + {27, "EFBIG"}, + {28, "ENOSPC"}, + {29, "ESPIPE"}, + {30, "EROFS"}, + {31, "EMLINK"}, + {32, "EPIPE"}, + {33, "EDOM"}, + {34, "ERANGE"}, + + {-1, NULL}, // Must be last + +}; +#endif // __KUTRACE_CONTROL_NAMES_RPI4_H__ + + diff --git a/book-user-code/kutrace_control_names_ryzen.h b/book-user-code/kutrace_control_names_ryzen.h new file mode 100644 index 000000000000..c5876de1affd --- /dev/null +++ b/book-user-code/kutrace_control_names_ryzen.h @@ -0,0 +1,529 @@ +// Names for syscall, etc. in dclab_tracing +// Copyright 2021 Richard L. Sites +// +// These are from linux-4.19.19 x86 AMD 64-bit. Others will vary. +// + +#ifndef __KUTRACE_CONTROL_NAMES_RYZEN_H__ +#define __KUTRACE_CONTROL_NAMES_RYZEN_H__ + +#include "kutrace_lib.h" + +static const char* CpuFamilyModelManuf = "23 17 AMD"; + +static const NumNamePair PidNames[] = { + {0, "-idle-"}, + {-1, NULL}, // Must be last +}; + +static const NumNamePair Syscall32Names[] = { + {511, "-sched-"}, // Fake last syscall. Indicates where __schedule runs + {-1, NULL}, // Must be last +}; + +static const NumNamePair Syscall64Names[] = { + {0, "read"}, + {1, "write"}, + {2, "open"}, + {3, "close"}, + {4, "stat"}, + {5, "fstat"}, + {6, "lstat"}, + {7, "poll"}, + {8, "lseek"}, + {9, "mmap"}, + {10, "mprotect"}, + {11, "munmap"}, + {12, "brk"}, + {13, "rt_sigaction"}, + {14, "rt_sigprocmask"}, + {15, "rt_sigreturn"}, + {16, "ioctl"}, + {17, "pread64"}, + {18, "pwrite64"}, + {19, "readv"}, + {20, "writev"}, + {21, "access"}, + {22, "pipe"}, + {23, "select"}, + {24, "sched_yield"}, + {25, "mremap"}, + {26, "msync"}, + {27, "mincore"}, + {28, "madvise"}, + {29, "shmget"}, + {30, "shmat"}, + {31, "shmctl"}, + {32, "dup"}, + {33, "dup2"}, + {34, "pause"}, + {35, "nanosleep"}, + {36, "getitimer"}, + {37, "alarm"}, + {38, "setitimer"}, + {39, "getpid"}, + {40, "sendfile"}, + {41, "socket"}, + {42, "connect"}, + {43, "accept"}, + {44, "sendto"}, + {45, "recvfrom"}, + {46, "sendmsg"}, + {47, "recvmsg"}, + {48, "shutdown"}, + {49, "bind"}, + {50, "listen"}, + {51, "getsockname"}, + {52, "getpeername"}, + {53, "socketpair"}, + {54, "setsockopt"}, + {55, "getsockopt"}, + {56, "clone"}, + {57, "fork"}, + {58, "vfork"}, + {59, "execve"}, + {60, "exit"}, + {61, "wait4"}, + {62, "kill"}, + {63, "uname"}, + {64, "semget"}, + {65, "semop"}, + {66, "semctl"}, + {67, "shmdt"}, + {68, "msgget"}, + {69, "msgsnd"}, + {70, "msgrcv"}, + {71, "msgctl"}, + {72, "fcntl"}, + {73, "flock"}, + {74, "fsync"}, + {75, "fdatasync"}, + {76, "truncate"}, + {77, "ftruncate"}, + {78, "getdents"}, + {79, "getcwd"}, + {80, "chdir"}, + {81, "fchdir"}, + {82, "rename"}, + {83, "mkdir"}, + {84, "rmdir"}, + {85, "creat"}, + {86, "link"}, + {87, "unlink"}, + {88, "symlink"}, + {89, "readlink"}, + {90, "chmod"}, + {91, "fchmod"}, + {92, "chown"}, + {93, "fchown"}, + {94, "lchown"}, + {95, "umask"}, + {96, "gettimeofday"}, + {97, "getrlimit"}, + {98, "getrusage"}, + {99, "sysinfo"}, + {100, "times"}, + {101, "ptrace"}, + {102, "getuid"}, + {103, "syslog"}, + {104, "getgid"}, + {105, "setuid"}, + {106, "setgid"}, + {107, "geteuid"}, + {108, "getegid"}, + {109, "setpgid"}, + {110, "getppid"}, + {111, "getpgrp"}, + {112, "setsid"}, + {113, "setreuid"}, + {114, "setregid"}, + {115, "getgroups"}, + {116, "setgroups"}, + {117, "setresuid"}, + {118, "getresuid"}, + {119, "setresgid"}, + {120, "getresgid"}, + {121, "getpgid"}, + {122, "setfsuid"}, + {123, "setfsgid"}, + {124, "getsid"}, + {125, "capget"}, + {126, "capset"}, + {127, "rt_sigpending"}, + {128, "rt_sigtimedwait"}, + {129, "rt_sigqueueinfo"}, + {130, "rt_sigsuspend"}, + {131, "sigaltstack"}, + {132, "utime"}, + {133, "mknod"}, + {134, "uselib"}, + {135, "personality"}, + {136, "ustat"}, + {137, "statfs"}, + {138, "fstatfs"}, + {139, "sysfs"}, + {140, "getpriority"}, + {141, "setpriority"}, + {142, "sched_setparam"}, + {143, "sched_getparam"}, + {144, "sched_setscheduler"}, + {145, "sched_getscheduler"}, + {146, "sched_get_priority_max"}, + {147, "sched_get_priority_min"}, + {148, "sched_rr_get_interval"}, + {149, "mlock"}, + {150, "munlock"}, + {151, "mlockall"}, + {152, "munlockall"}, + {153, "vhangup"}, + {154, "modify_ldt"}, + {155, "pivot_root"}, + {156, "_sysctl"}, + {157, "prctl"}, + {158, "arch_prctl"}, + {159, "adjtimex"}, + {160, "setrlimit"}, + {161, "chroot"}, + {162, "sync"}, + {163, "acct"}, + {164, "settimeofday"}, + {165, "mount"}, + {166, "umount2"}, + {167, "swapon"}, + {168, "swapoff"}, + {169, "reboot"}, + {170, "sethostname"}, + {171, "setdomainname"}, + {172, "iopl"}, + {173, "ioperm"}, + {174, "create_module"}, + {175, "init_module"}, + {176, "delete_module"}, + {177, "get_kernel_syms"}, + {178, "query_module"}, + {179, "quotactl"}, + {180, "nfsservctl"}, + {181, "getpmsg"}, + {182, "putpmsg"}, + {183, "afs_syscall"}, + {184, "tuxcall"}, + {185, "security"}, + {186, "gettid"}, + {187, "readahead"}, + {188, "setxattr"}, + {189, "lsetxattr"}, + {190, "fsetxattr"}, + {191, "getxattr"}, + {192, "lgetxattr"}, + {193, "fgetxattr"}, + {194, "listxattr"}, + {195, "llistxattr"}, + {196, "flistxattr"}, + {197, "removexattr"}, + {198, "lremovexattr"}, + {199, "fremovexattr"}, + {200, "tkill"}, + {201, "time"}, + {202, "futex"}, + {203, "sched_setaffinity"}, + {204, "sched_getaffinity"}, + {205, "set_thread_area"}, + {206, "io_setup"}, + {207, "io_destroy"}, + {208, "io_getevents"}, + {209, "io_submit"}, + {210, "io_cancel"}, + {211, "get_thread_area"}, + {212, "lookup_dcookie"}, + {213, "epoll_create"}, + {214, "epoll_ctl_old"}, + {215, "epoll_wait_old"}, + {216, "remap_file_pages"}, + {217, "getdents64"}, + {218, "set_tid_address"}, + {219, "restart_syscall"}, + {220, "semtimedop"}, + {221, "fadvise64"}, + {222, "timer_create"}, + {223, "timer_settime"}, + {224, "timer_gettime"}, + {225, "timer_getoverrun"}, + {226, "timer_delete"}, + {227, "clock_settime"}, + {228, "clock_gettime"}, + {229, "clock_getres"}, + {230, "clock_nanosleep"}, + {231, "exit_group"}, + {232, "epoll_wait"}, + {233, "epoll_ctl"}, + {234, "tgkill"}, + {235, "utimes"}, + {236, "vserver"}, + {237, "mbind"}, + {238, "set_mempolicy"}, + {239, "get_mempolicy"}, + {240, "mq_open"}, + {241, "mq_unlink"}, + {242, "mq_timedsend"}, + {243, "mq_timedreceive"}, + {244, "mq_notify"}, + {245, "mq_getsetattr"}, + {246, "kexec_load"}, + {247, "waitid"}, + {248, "add_key"}, + {249, "request_key"}, + {250, "keyctl"}, + {251, "ioprio_set"}, + {252, "ioprio_get"}, + {253, "inotify_init"}, + {254, "inotify_add_watch"}, + {255, "inotify_rm_watch"}, + {256, "migrate_pages"}, + {257, "openat"}, + {258, "mkdirat"}, + {259, "mknodat"}, + {260, "fchownat"}, + {261, "futimesat"}, + {262, "newfstatat"}, + {263, "unlinkat"}, + {264, "renameat"}, + {265, "linkat"}, + {266, "symlinkat"}, + {267, "readlinkat"}, + {268, "fchmodat"}, + {269, "faccessat"}, + {270, "pselect6"}, + {271, "ppoll"}, + {272, "unshare"}, + {273, "set_robust_list"}, + {274, "get_robust_list"}, + {275, "splice"}, + {276, "tee"}, + {277, "sync_file_range"}, + {278, "vmsplice"}, + {279, "move_pages"}, + {280, "utimensat"}, + {281, "epoll_pwait"}, + {282, "signalfd"}, + {283, "timerfd_create"}, + {284, "eventfd"}, + {285, "fallocate"}, + {286, "timerfd_settime"}, + {287, "timerfd_gettime"}, + {288, "accept4"}, + {289, "signalfd4"}, + {290, "eventfd2"}, + {291, "epoll_create1"}, + {292, "dup3"}, + {293, "pipe2"}, + {294, "inotify_init1"}, + {295, "preadv"}, + {296, "pwritev"}, + {297, "rt_tgsigqueueinfo"}, + {298, "perf_event_open"}, + {299, "recvmmsg"}, + {300, "fanotify_init"}, + {301, "fanotify_mark"}, + {302, "prlimit64"}, + {303, "name_to_handle_at"}, + {304, "open_by_handle_at"}, + {305, "clock_adjtime"}, + {306, "syncfs"}, + {307, "sendmmsg"}, + {308, "setns"}, + {309, "getcpu"}, + {310, "process_vm_readv"}, + {311, "process_vm_writev"}, + {312, "kcmp"}, + {313, "finit_module"}, + {314, "sched_setattr"}, + {315, "sched_getattr"}, + {316, "renameat2"}, + {317, "seccomp"}, + {318, "getrandom"}, + {319, "memfd_create"}, + {320, "kexec_file_load"}, + {321, "bpf"}, + {322, "execveat"}, + {323, "userfaultfd"}, + {324, "membarrier"}, + {325, "mlock2"}, + {511, "-sched-"}, // Fake last syscall. Indicates where __schedule runs + {-1, NULL}, // Must be last +}; + +// Based on arch/x86/include/asm/x86/irq_vectors.h +// 2017: arch/x86/include/asm/irq_vectors.h +// 2019: arch/x86/include/asm/irq_vectors.h +/* + * Vectors 0 ... 31 : system traps and exceptions - hardcoded events + * Vectors 32 ... 127 : device interrupts + * Vector 128 : legacy int80 syscall interface + * Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts + * Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts + */ + +static const NumNamePair IrqNames[] = { + // 2017 machines + {0, "timer"}, // timer + {1, "i8042_keyboard1"}, // keyboard/touchpad/mouse + {8, "rtc0"}, // real-time clock chip + {9, "acpi"}, + {12, "i8042_keyboard12"}, // keyboard/touchpad/mouse + {16, "usb1"}, + {23, "usb2"}, + {24, "i915_graphics"}, // usb + {28, "enp2s0_eth0"}, // aka eth0 + {29, "hda_29_inner"}, // disk + {30, "hda_30_inner"}, // disk + {31, "mei_me"}, // Management Engine Interface + {38, "sdb"}, // disk + //{29, "snd_hda_29"}, // audio ?? maybe 32/33 now + //{30, "snd_hda_30"}, // audio ?? + + // 2017 for our particular machines dclab-1,2,3,4 + {0x22, "eth0"}, // eth0 + {0x42, "hda_29"}, // disk, forwarded to 29 + {0x52, "hda_30"}, // disk, forwarded to 30 + {0x62, "hda_31"}, // disk + {0xb1, "graphics"}, // ether?, forwards to 24, no return + + // 2018 for our particular machines dclab-1,2,3 + {0xb2, "eth0"}, // ethernet + + + {128, "int80"}, + +// 2019.03.05 Linux 4.19 Ryzen numbers seem to move around at reboot */ + //{0x21, "??"}, // 1/sec + {0x23, "eth0"}, // 18/sec + //{0x24, "??"}, // 129 as 64+64 5 sec + {0x25, "eth0?"}, // aka eth0 + {0x27, "sdb2"}, // aka disk + + // {255, "spurious_apic"}, + {255, "BH"}, // bottom half of an interrupt handler + {254, "error_apic_ipi"}, + {253, "reschedule_ipi"}, + {252, "call_func_ipi"}, + {251, "call_func1_ipi"}, + {250, "thermal_apic_ipi"}, + {249, "threshold_apic_ipi"}, + {248, "reboot_ipi"}, + {247, "x86_platform_ipi"}, + {246, "irq_work_ipi"}, + {245, "uv_bau_message"}, + {244, "deferred_error"}, + {243, "hypervisor_callback"}, + {242, "posted_intr"}, + {241, "posted_intr_wakeup"}, + {240, "posted_intr_nested"}, + {239, "managed_irq_shutdown"}, + {238, "hyperv_reenlighten"}, + {237, "hyperv_stimer0"}, + {236, "local_timer_vector"}, // event 0x05ec, decimal 1516 4.19 x86 + + {13, "fpu_irq"}, + + {-1, NULL}, // Must be last +}; + +// Export this to raw2event.cc, using above value +static const int kTIMER_IRQ_EVENT = 0x05ec; + +// Bottom half BH vectors, from include/linux/interrupt.h +static const NumNamePair SoftIrqNames[] = { + {0, "HI_SOFTIRQ"}, + {1, "TIMER_SOFTIRQ"}, + {2, "NET_TX_SOFTIRQ"}, + {3, "NET_RX_SOFTIRQ"}, + {4, "BLOCK_SOFTIRQ"}, + {5, "IRQ_POLL_SOFTIRQ"}, + {6, "TASKLET_SOFTIRQ"}, + {7, "SCHED_SOFTIRQ"}, + {8, "HRTIMER_SOFTIRQ"}, + {9, "RCU_SOFTIRQ"}, + + {-1, NULL}, // Must be last +}; + +static const NumNamePair TrapNames[] = { + {0, "Divide-by-zero"}, + {1, "Debug"}, + {2, "Non-maskable_Interrupt"}, + {3, "Breakpoint"}, + {4, "Overflow"}, + {5, "Bound_Range_Exceeded"}, + {6, "Invalid_Opcode"}, + {7, "device_not_available"}, + {8, "Double_Fault"}, + {9, "Coprocessor_Segment_Overrun"}, + {10, "Invalid_TSS"}, + {11, "Segment_Not_Present"}, + {12, "Stack_Segment_Fault"}, + {13, "General_Protection_Fault"}, + {14, "page_fault"}, + {15, "Spurious_Interrupt"}, + {16, "x87_Floating-Point_Exception"}, + {17, "Alignment_Check"}, + {18, "Machine_Check"}, + {19, "SIMD_Floating-Point_Exception"}, + {32, "IRET_Exception"}, + + {-1, NULL}, // Must be last +}; + +// This is just the base set. More could be added later +// see linux-4.19.19/tools/include/uapi/asm-generic/errno-base.h +// linux-4.19.19/include/linux/errno.h +// linux-4.19.19/include/uapi/linux/errno.h +// linux-4.19.19/include/uapi/asm-generic/errno.h + +static const NumNamePair ErrnoNames[] = { + {1, "EPERM"}, + {2, "ENOENT"}, + {3, "ESRCH"}, + {4, "EINTR"}, + {5, "EIO"}, + {6, "ENXIO"}, + {7, "E2BIG"}, + {8, "ENOEXEC"}, + {9, "EBADF"}, + {10, "ECHILD"}, + {11, "EAGAIN"}, + {12, "ENOMEM"}, + {13, "EACCES"}, + {14, "EFAULT"}, + {15, "ENOTBLK"}, + {16, "EBUSY"}, + {17, "EEXIST"}, + {18, "EXDEV"}, + {19, "ENODEV"}, + {20, "ENOTDIR"}, + {21, "EISDIR"}, + {22, "EINVAL"}, + {23, "ENFILE"}, + {24, "EMFILE"}, + {25, "ENOTTY"}, + {26, "ETXTBSY"}, + {27, "EFBIG"}, + {28, "ENOSPC"}, + {29, "ESPIPE"}, + {30, "EROFS"}, + {31, "EMLINK"}, + {32, "EPIPE"}, + {33, "EDOM"}, + {34, "ERANGE"}, + {35, "EDEADLK"}, + {36, "ENAMETOOLONG"}, + {37, "ENOLCK"}, + {38, "ENOSYS"}, + {39, "ENOTEMPTY"}, + {40, "ELOOP"}, + + {-1, NULL}, // Must be last + +}; +#endif // __KUTRACE_CONTROL_NAMES_RYZEN_H__ + + diff --git a/book-user-code/kutrace_lib.cc b/book-user-code/kutrace_lib.cc new file mode 100644 index 000000000000..8e84fab9c3d9 --- /dev/null +++ b/book-user-code/kutrace_lib.cc @@ -0,0 +1,860 @@ +// Little user-mode library program to control kutracing +// Copyright 2021 Richard L. Sites +// + +#include +#include // exit, system +#include +#include // nanosleep +#include // getpid gethostname syscall +#include // gettimeofday +#include + +#if defined(__x86_64__) +#include // _rdtsc +#endif + +#include "basetypes.h" +#include "kutrace_control_names.h" // PidNames, TrapNames, IrqNames, Syscall64Names +#include "kutrace_lib.h" + +// All the real stuff is inside this anonymous namespace +namespace { + +/* Outgoing arg to DoReset */ +#define DO_IPC 1 +#define DO_WRAP 2 + +/* For the flags byte in traceblock[1] */ +#define IPC_Flag CLU(0x80) +#define WRAP_Flag CLU(0x40) +#define Unused2_Flag CLU(0x20) +#define Unused1_Flag CLU(0x10) +#define VERSION_MASK CLU(0x0F) + + +// Module/code must be at least this version number for us to run +static const u64 kMinModuleVersionNumber = 3; + +// This defines the format of the resulting trace file +static const u64 kTracefileVersionNumber = 3; + +// Number of u64 values per trace block +static const int kTraceBufSize = 8192; + +// Number of u64 values per IPC block, one u8 per u64 in trace buf +static const int kIpcBufSize = kTraceBufSize >> 3; + +// For wraparound fixup on Raspberry Pi-4B Arm-v7 +static const int mhz_32bit_cycles = 54; + +// Globals for mapping cycles to gettimeofday +int64 start_cycles = 0; +int64 stop_cycles = 0; +int64 start_usec = 0; +int64 stop_usec = 0; + +char kernelversion[256]; +char modelname[256]; + +// Useful utility routines +int64 GetUsec() { + struct timeval tv; gettimeofday(&tv, NULL); + return (tv.tv_sec * CL(1000000)) + tv.tv_usec; +} + + + +/*x86-64 or Arm-specific timer */ +/* Arm-64 returns 32MHz counts: 31.25 ns each */ +/* Arm-32 Raspberry Pi4B 54MHz counts: 18.52 nsec */ +/* x86-64 version returns rdtsc() >> 6 to give ~20ns resolution */ +inline u64 ku_get_cycles(void) +{ + u64 timer_value; +#if defined(__aarch64__) + asm volatile("mrs %0, cntvct_el0" : "=r"(timer_value)); +#elif defined(__ARM_ARCH_ISA_ARM) + /* This 32-bit result at 54 MHz RPi4 wraps every 75 seconds */ + asm volatile("mrrc p15, 1, %Q0, %R0, c14" : "=r" (timer_value)); + timer_value &= CLU(0x00000000FFFFFFFF); +#elif defined(__x86_64__) + timer_value = _rdtsc() >> 6; +#else + BUILD_BUG_ON_MSG(1, "Define the time base for your architecture"); +#endif + return timer_value; +} + + + +// Read time counter and gettimeofday() close together, returning both +void GetTimePair(int64* cycles, int64* usec) { + int64 startcy, stopcy; + int64 gtodusec, elapsedcy; + // Do more than once if we get an interrupt or other big delay in the middle of the loop + do { + startcy = ku_get_cycles(); + gtodusec = GetUsec(); + stopcy = ku_get_cycles(); + elapsedcy = stopcy - startcy; + // In a quick test on an Intel i3 chip, GetUsec() took about 150 cycles (50 nsec) + // Perhaps 4x this on Arm chips + // printf("%ld elapsed cycles\n", elapsedcy); + } while (elapsedcy > 320); // About 10 usec at 32MHz + *cycles = startcy; + *usec = gtodusec; +} + + +// For the trace_control system call, +// arg is declared to be u64. In reality, it is either a u64 or +// a pointer to a u64, depending on the command. Caller casts as +// needed, and the command implementations in kutrace_mod +// cast back as needed. + +/* VERYTEMP to see result values */ +// u32 swi_ret0, swi_ret1; + +// These numbers must exactly match the numbers in include/linux/kutrace.h +#define __NR_kutrace_control 1023 +#define KUTRACE_SCHEDSYSCALL 511 + +#if defined(__ARM_ARCH_ISA_ARM) && !defined(__aarch64__) + +#define noinline __attribute__((noinline)) +u64 noinline DoControl(u64 command, u64 arg) +{ + /* gcc -O2 removes all the crap and makes 5 instructions! */ + /* str r7; ldr r7; swi; ldr r7; bx */ + /* Order avoids overwriting r0, r2 too soon */ + register u32 command_hi asm("r1") = (u32) (command >> 32); + register u32 command_lo asm("r0") = (u32)(command & 0xffffffff); + register u32 arg_hi asm("r3") = (u32)(arg >> 32); + register u32 arg_lo asm("r2") = (u32)(arg & 0xffffffff); + register u32 ret0 asm ("r0"); + register u32 ret1 asm ("r1"); + register u32 nr asm("r7") = __NR_kutrace_control; + + asm volatile( + " swi #0\n" + : "=r" (ret0), "=r" (ret1) + : "r" (command_lo), "r" (command_hi), "r" (arg_lo), "r" (arg_hi), "r" (nr) + : "memory"); +// swi_ret0 = ret0; +// swi_ret1 = ret1; + return ((u64)ret1 << 32) | (u64)ret0; +} + +#else + +u64 inline DoControl(u64 command, u64 arg) +{ + return syscall(__NR_kutrace_control, command, arg); +} + +#endif + +// X86-64 inline version +// u64 retval; +// asm volatile +// ( +// "syscall" +// : "=a" (retval) +// : "0"(__NR_dclab_control), "D"(command), "S"(arg) +// : "cc", "rcx", "r11", "memory" +// ); +// return retval; + +// Sleep for n milliseconds +void msleep(int msec) { + struct timespec ts; + ts.tv_sec = msec / 1000; + ts.tv_nsec = (msec % 1000) * 1000000; + nanosleep(&ts, NULL); +} + +// Single static buffer. In real production code, this would +// all be std::string value, or something else at least as safe. +static const int kMaxDateTimeBuffer = 32; +static char gTempDateTimeBuffer[kMaxDateTimeBuffer]; + +// Turn seconds since the epoch into yyyymmdd_hhmmss +// Not valid after January 19, 2038 +const char* FormatSecondsDateTime(int32 sec) { + // if (sec == 0) {return "unknown";} // Longer spelling: caller expecting date + time_t tt = sec; + struct tm* t = localtime(&tt); + sprintf(gTempDateTimeBuffer, "%04d%02d%02d_%02d%02d%02d", + t->tm_year + 1900, t->tm_mon + 1, t->tm_mday, + t->tm_hour, t->tm_min, t->tm_sec); + return gTempDateTimeBuffer; +} + +// Construct a name for opening a trace file, using name of program from command line +// name: program_time_host_pid +// str should hold at least 256 bytes +const char* MakeTraceFileName(const char* argv0, char* str) { + const char* slash = strrchr(argv0, '/'); + // Point to first char of image name + if (slash == NULL) { + slash = argv0; + } else { + slash = slash + 1; // over the slash + } + + const char* timestr; + time_t tt = time(NULL); + timestr = FormatSecondsDateTime(tt); + + char hostnamestr[256]; + gethostname(hostnamestr, 256) ; + hostnamestr[255] = '\0'; + + int pid = getpid(); + + sprintf(str, "%s_%s_%s_%d.trace", slash, timestr, hostnamestr, pid); + return str; +} + +// This depends on ~KUTRACE_CMD_INSERTN working even with tracing off. +void InsertVariableEntry(const char* str, u64 event, u64 arg) { + u64 temp[8]; // Up to 56 bytes + u64 bytelen = strlen(str); + if (bytelen > 56) {bytelen = 56;} // If too long, truncate + u64 wordlen = 1 + ((bytelen + 7) / 8); + // Build the initial word + u64 event_with_length = event + (wordlen * 16); + // T N ARG + temp[0] = (CLU(0) << 44) | (event_with_length << 32) | arg; + memset(&temp[1], 0, 7 * sizeof(u64)); + memcpy((char*)&temp[1], str, bytelen); + DoControl(~KUTRACE_CMD_INSERTN, (u64)&temp[0]); +} + +// Add a list of names to the trace +void EmitNames(const NumNamePair* ipair, u64 event) { + u64 temp[9]; // One extra word for strcpy(56 bytes + '\0') + const NumNamePair* pair = ipair; + while (pair->name != NULL) { + InsertVariableEntry(pair->name, event, pair->number); + ++pair; + } +} + + + +// This depends on ~TRACE_INSERTN working even with tracing off. +void InsertTimePair(int64 cycles, int64 usec) { + u64 temp[8]; // Always 8 words for TRACE_INSERTN + u64 n_with_length = KUTRACE_TIMEPAIR + (3 << 4); + temp[0] = (CLU(0) << 44) | (n_with_length << 32); + temp[1] = cycles; + temp[2] = usec; + DoControl(~KUTRACE_CMD_INSERTN, (u64)&temp[0]); +} + + + +// Return false if the module is not loaded or too old. No delay. No side effect on time. +bool TestModule() { + // If module is not loaded, syscall 511 returns -1 or -ENOSYS (= -38) + // Unsigned, these are bigger than the biggest plausible version number, 255 + u64 retval = DoControl(KUTRACE_CMD_VERSION, 0); +// VERYTEMP +// fprintf(stderr, "TestModule %08lx %08lx\n", swi_ret0, swi_ret1); + + if (retval > 255) { + // Module is not loaded + fprintf(stderr, "KUtrace module/code not loaded\n"); + return false; + } + if (retval < kMinModuleVersionNumber) { + // Module is loaded but older version + fprintf(stderr, "KUtrace module/code is version %lld. Need at least %lld\n", + retval, kMinModuleVersionNumber); + return false; + } + //fprintf(stderr, "KUtrace module/code is version %ld.\n", retval); + return true; +} + + +// Return true if module is loaded and tracing is on, else false +// CMD_TEST returns -ENOSYS (= -38) if not a tracing kernel +// else returns 0 if tracing is off +// else returns 1 if tracing is on +bool DoTest() { + u64 retval = DoControl(KUTRACE_CMD_TEST, 0); + if ((int64)retval < 0) { + // KUtrace module/code is not available + fprintf(stderr, "KUtrace module/code not available\n"); + return false; + } + return (retval == 1); +} + +// Turn off tracing +// Complain and return false if module is not loaded +bool DoOff() { + u64 retval = DoControl(KUTRACE_CMD_OFF, 0); +//fprintf(stderr, "DoOff DoControl = %016lx\n", retval); + + msleep(20); /* Wait 20 msec for any pending tracing to finish */ + if (retval != 0) { + // Module is not loaded + fprintf(stderr, "KUtrace module/code not available\n"); + return false; + } + // Get stop time pair with tracing off + if (stop_usec == 0) {GetTimePair(&stop_cycles, &stop_usec);} +//fprintf(stdout, "DoOff GetTimePair %lx %lx\n", stop_cycles, stop_usec); + return true; +} + +// Turn on tracing +// Complain and return false if module is not loaded +bool DoOn() { +//fprintf(stderr, "DoOn\n"); + // Get start time pair with tracing off + if (start_usec == 0) {GetTimePair(&start_cycles, &start_usec);} +//fprintf(stderr, "DoOn GetTimePair %lx %lx\n", start_cycles, start_usec); + u64 retval = DoControl(KUTRACE_CMD_ON, 0); +//fprintf(stderr, "DoOn DoControl = %016lx\n", retval); + if (retval != 1) { + // Module is not loaded + fprintf(stderr, "KUtrace module/code not available\n"); + return false; + } + return true; +} + +void StripCRLF(char* s) { + int len = strlen(s); + if ((0 < len) && s[len - 1] == '\n') {s[len - 1] = '\0'; --len;} + if ((0 < len) && s[len - 1] == '\r') {s[len - 1] = '\0'; --len;} +} + + +// We want to run all this stuff at kutrace_control startup and/or at reset, but just once +// per execution is sufficient once these values don't change until reboot. +// Current design is two-step: +// one routine to capture all the info +// second routine to insert into trace buffer +// We want the inserts to be fast and have no delays that might allow a process +// migration in the *middle* of building the initial name list. Doing so will +// confuse wraparound and my leave some events unnamed for the first few +// housand rawtoevent entries if some CPU blocks precede the remainder of +// the name entries. + +// Linux kernel version +// CPU model name +// Hostname +// Network link speed +// Interrupt number to name mapping +// + +// TODO: Get interrupt pairs from /proc/interrupt +// TODO: Get hostname from system call gethostname +// TODO: Get Ethernet link speed from /sys/class/net/*/speed + +/*** +// Get next interrupt description line from xxx, if any, and return +// interrupt number and name and return value of true. +// If no more lines, return false +// +// Expecting: +// cat /proc/interrupts +// CPU0 CPU1 +// 0: 20 0 IO-APIC 2-edge timer +// 1: 3 0 IO-APIC 1-edge i8042 +// 8: 1 0 IO-APIC 8-edge rtc0 + +bool NextIntr(FILE* procintrfile, int* intrnum, char* intrname) { + char buffer[kMaxBufferSize]; + while (ReadLine(procintrfile, buffer, kMaxBufferSize)) { + int n = sscanf(buffer, "%d:", intrnum); + if (n != 1) {continue;} // No intr on this line + const char* space = strrchr(buffer, ' '); + if (space == NULL) {continue;} // No name on this line + if (space[1] == '\0') {continue;} // Empty name on this line + strncpy(intrname, space + 1, kMaxBufferSize); + return true; + } + + return false; +} + +// Litte code to read /proc/interrupts and capture names for each interrupt number +// dsites 2017.12.15 +// +// use InsertVariableEntry now 2020.11.12 +// +void EmitLocalIrqNames(u64 n) { + FILE* procintrfile = fopen("/proc/interrupts", "r"); + if (procintrfile == NULL) {return;} + + u64 temp[9]; // One extra word for strcpy(56B + \n) + int intrnum; + char intrname[kMaxBufferSize]; + while (NextIntr(procintrfile, &intrnum, intrname)) { + u64 bytelen = strlen(intrname); + if (bytelen > 55) {continue;} // Error if too long. Drop + u64 wordlen = 1 + ((bytelen + 7) / 8); + // Build the initial word + u64 n_with_length = n + (wordlen * 16); + // T N ARG + temp[0] = (0ll << 44) | (n_with_length << 32) | (intrnum); + memset(&temp[1], 0, 8 * sizeof(u64)); + strcpy((char*)&temp[1], intrname); + DoControl(~TRACE_INSERTN, (u64)&temp[0]); + } + fclose(procintrfile); +} +***/ + + +// Kernel version is the result of command: uname -rv +void GetKernelVersion(char* kernelversion, int len) { + kernelversion[0] = '\0'; + FILE *fp = popen("uname -rv", "r"); + if (fp == NULL) {return;} + char* s = fgets(kernelversion, len, fp); + pclose(fp); + if (s != NULL) { + StripCRLF(kernelversion); + } +} + +// Model number is in /proc/cpuinfo +void GetModelName(char* modelname, int len) { + modelname[0] = '\0'; + FILE *cpuinfo = fopen("/proc/cpuinfo", "rb"); + if (cpuinfo == NULL) {return;} + char *arg = NULL; + size_t size = 0; + while(getline(&arg, &size, cpuinfo) != -1) + { + // Expecting something like + // model name : ARMv7 Processor rev 3 (v7l) + if(memcmp(arg, "model name", 10) == 0) { + const char* colon = strchr(arg, ':'); + if (colon != NULL) { // Skip the colon and the next space + StripCRLF(arg); + strncpy(modelname, colon + 2, len); + modelname[len - 1] = '\0'; + break; // Just the first one, then get out + } + } + } + free(arg); + fclose(cpuinfo); +} + +// Inserts result of uname -rv +void InsertKernelVersion(const char* kernelversion) { + InsertVariableEntry(kernelversion, KUTRACE_KERNEL_VER, 0); +} + +void InsertModelName(const char* modelname) { + InsertVariableEntry(modelname, KUTRACE_MODEL_NAME, 0); +} + +// Initialize trace buffer with syscall/irq/trap names +// and processor model name, uname -rv +// Module must be loaded. Tracing must be off +void DoInit(const char* process_name) { +//fprintf(stderr, "DoInit\n"); + if (!TestModule()) {return;} // No module loaded + + // AHHA. These take more than 10msec to execute. so 20-bit time can wrap. + // We need to capture the strings up front before creating the frist trace entry, + // then insert later. + GetKernelVersion(kernelversion, 256); // Slow. Must do first + GetModelName(modelname, 256); // Slow. Must do first + GetTimePair(&start_cycles, &start_usec); // Now OK to look at time + +//fprintf(stderr, "DoInit GetTimePair %lx %lx\n", start_cycles, start_usec); + // Insert the timepair as a trace entry. + // This is a hedge against separate programs starting (wraparound) tracing + // and stopping tracing. If this happens, start_usec will be zero at DoOff(). + + // DEPRECATED + // We want this to be the very first trace entry so we can find it easily + InsertTimePair(start_cycles, start_usec); + + // A little trace environment information + InsertKernelVersion(kernelversion); + InsertModelName(modelname); + + // Put trap/irq/syscall names into front of trace + EmitNames(PidNames, KUTRACE_PIDNAME); + EmitNames(TrapNames, KUTRACE_TRAPNAME); + EmitNames(IrqNames, KUTRACE_INTERRUPTNAME); + EmitNames(Syscall64Names, KUTRACE_SYSCALL64NAME); + EmitNames(Syscall32Names, KUTRACE_SYSCALL32NAME); + + // Put current pid name into front of real part of trace + int pid = getpid() & 0x0000ffff; + InsertVariableEntry(process_name, KUTRACE_PIDNAME, pid); + + // And then establish that pid on this CPU + // T N ARG + u64 temp = (CLU(0) << 44) | ((u64)KUTRACE_USERPID << 32) | (pid); + DoControl(~KUTRACE_CMD_INSERT1, temp); +} + +// With tracing off, zero out the rest of each partly-used traceblock +// Module must be loaded. Tracing must be off +void DoFlush() { +//fprintf(stderr, "DoFlush\n"); + if (!TestModule()) {return;} // No module loaded + DoControl(KUTRACE_CMD_FLUSH, 0); +//fprintf(stderr, "DoFlush DoControl returned\n"); +} + +// Set up for a new tracing run +// Module must be loaded. Tracing must be off +void DoReset(u64 control_flags) { + if (!TestModule()) {return;} // No module loaded + DoControl(KUTRACE_CMD_RESET, control_flags); + + start_usec = 0; + stop_usec = 0; + start_cycles = 0; + stop_cycles = 0; +} + +// Show some sort of tracing status +// Module must be loaded. Tracing may well be on +// If IPC,only 7/8 of the blocks are counted: +// for every 64KB traceblock there is another 8KB IPCblock (and some wasted space) +void DoStat(u64 control_flags) { + u64 retval = DoControl(KUTRACE_CMD_STAT, 0); + double blocksize = kTraceBufSize * sizeof(u64); + if ((control_flags & DO_IPC) != 0) {blocksize = (blocksize * 8) / 7;} + fprintf(stderr, "Stat: %lld trace blocks used (%3.1fMB)\n", + retval, (retval * blocksize) / (1024 * 1024)); +} + +// Called with the very first trace block, moduleversion >= 3 +// This block has 12 words on the front, then a 3-word TimePairNum trace entry +void ExtractTimePair(u64* traceblock, int64* fallback_cycles, int64* fallback_usec) { + u64 entry0 = traceblock[12]; + u64 entry0_event = (entry0 >> 32) & 0xFFF; + if ((entry0_event & 0xF0F) != KUTRACE_TIMEPAIR) { // take out length nibble + fprintf(stderr, "ExtractTimePair missing event\n"); + *fallback_cycles = 0; + *fallback_usec = 0; + return; + } + *fallback_cycles = traceblock[13]; + *fallback_usec = traceblock[14]; +} + +// F(cycles) gives usec = base_usec + (cycles - base_cycles) * m; +typedef struct { + u64 base_cycles; + u64 base_usec; + double m_slope; +} CyclesToUsecParams; + +void SetParams(int64 start_cycles, int64 start_usec, + int64 stop_cycles, int64 stop_usec, CyclesToUsecParams* param) { + param->base_cycles = start_cycles; + param->base_usec = start_usec; + if (stop_cycles <= start_cycles) {stop_cycles = start_cycles + 1;} // avoid zdiv + param->m_slope = (stop_usec - start_usec) * 1.0 / (stop_cycles - start_cycles); +} + +int64 CyclesToUsec(int64 cycles, const CyclesToUsecParams& param) { + int64 delta_usec = (cycles - param.base_cycles) * param.m_slope; + return param.base_usec + delta_usec; +} + + + +// Dump the trace buffer to filename +// Module must be loaded. Tracing must be off +void DoDump(const char* fname) { + // if (!TestModule()) {return;} // No module loaded + DoControl(KUTRACE_CMD_FLUSH, 0); + + // Start timepair is set by DoInit + // Stop timepair is set by DoOff + // If start_cycles is zero, we got here directly without calling DoInit, + // which was done in some earlier run of this program. In that case, go + // find the start pair as the first real trace entry in the first trace block. + CyclesToUsecParams params; + + FILE* f = fopen(fname, "wb"); + if (f == NULL) { + fprintf(stderr, "%s did not open\n", fname); + return; + } + + u64 traceblock[kTraceBufSize]; + u64 ipcblock[kIpcBufSize]; + // Get number of trace blocks + // If tracing wraped around, the count is complemented + bool did_wrap_around = false; + u64 wordcount = DoControl(KUTRACE_CMD_GETCOUNT, 0); + if ((s64)wordcount < 0) { + wordcount = ~wordcount; + did_wrap_around = true; + } + u64 blockcount = wordcount >> 13; +//fprintf(stderr, "wordcount = %ld\n", wordcount); +//fprintf(stderr, "blockcount = %ld\n", blockcount); + + // Loop on trace blocks + for (int i = 0; i < blockcount; ++i) { + u64 k = i * kTraceBufSize; // Trace Word number to fetch next + u64 k2 = i * kIpcBufSize; // IPC Word number to fetch next + + // Extract 64KB trace block + for (int j = 0; j < kTraceBufSize; ++j) { + traceblock[j] = DoControl(KUTRACE_CMD_GETWORD, k++); + } + + // traceblock[0] has cpu number and cycle counter + // traceblock[1] has flags in top byte, then zeros + // We put the reconstructed getimeofday value into traceblock[1] + uint8 flags = traceblock[1] >> 56; + bool this_block_has_ipc = ((flags & IPC_Flag) != 0); + + // For very first block, insert value of m as a double, for dump program to use + // and clear traceblock[3], reserved for future use + bool very_first_block = (i == 0); + if (very_first_block) { + // Fill in the tracefile version + traceblock[1] |= ((kTracefileVersionNumber & VERSION_MASK) << 56); + if (!did_wrap_around) { + // The kernel exports the wrap flag in the first block before + // it is known whether the trace actually wrapped. + // It did not, so turn off that bit + traceblock[1] &= ~(WRAP_Flag << 56); + } + // Extract the fallback start timepair + int64 fallback_usec, fallback_cycles; + ExtractTimePair(traceblock, &fallback_cycles, &fallback_usec); + if (start_usec == 0) { + start_usec = fallback_usec; + start_cycles = fallback_cycles; + } + + // For Arm-32, the "cycle" counter is only 32 bits at 54 MHz, so wraps about every 79 seconds. + // This can leave stop_cycles small by a few multiples of 4G. We do a temporary fix here + // for exactly 54 MHz. Later, we could find or take as input a different approximate + // frequency. We could also do something similar for a 40-bit counter. + bool has_32bit_cycles = ((start_cycles | stop_cycles) & 0xffffffff00000000llu) == 0; + if (has_32bit_cycles) { + uint64 elapsed_usec = (uint64)(stop_usec - start_usec); + uint64 elapsed_cycles = (uint64)(stop_cycles - start_cycles); + uint64 expected_cycles = elapsed_usec * mhz_32bit_cycles; + // Pick off the expected high bits + uint64 approx_hi = (start_cycles + expected_cycles) & 0xffffffff00000000llu; + // Put them in + stop_cycles |= (int64)approx_hi; + // Cross-check and change by 1 if right at a boundary + // and off by more than 12.5% from expected MHz + elapsed_cycles = (uint64)(stop_cycles - start_cycles); + uint64 ratio = elapsed_cycles / elapsed_usec; + if (ratio > (mhz_32bit_cycles + (mhz_32bit_cycles >> 3))) {stop_cycles -= 0x0000000100000000llu;} + if (ratio < (mhz_32bit_cycles - (mhz_32bit_cycles >> 3))) {stop_cycles += 0x0000000100000000llu;} + elapsed_cycles = (uint64)(stop_cycles - start_cycles); + } + + uint64 block_0_cycle = traceblock[0] & CLU(0x00ffffffffffffff); + + // Get ready to reconstruct gettimeofday values for each traceblock + SetParams(start_cycles, start_usec, stop_cycles, stop_usec, ¶ms); + + // Fill in the start/stop timepairs we are using, so + // downstream programs can also SetParams + traceblock[2] = start_cycles; + traceblock[3] = start_usec; + traceblock[4] = stop_cycles; + traceblock[5] = stop_usec; + } + + // Reconstruct the gettimeofday value for this block + int64 block_cycles = traceblock[0] & CLU(0x00ffffffffffffff); + int64 block_usec = CyclesToUsec(block_cycles, params); + traceblock[1] |= (block_usec & CLU(0x00ffffffffffffff)); + fwrite(traceblock, 1, sizeof(traceblock), f); + + // For each 64KB traceblock that has IPC_Flag set, also read the IPC bytes + if (this_block_has_ipc) { + // Extract 8KB IPC block + for (int j = 0; j < kIpcBufSize; ++j) { + ipcblock[j] = DoControl(KUTRACE_CMD_GETIPCWORD, k2++); + } + fwrite(ipcblock, 1, sizeof(ipcblock), f); + } + } + fclose(f); + + fprintf(stdout, " %s written (%3.1fMB)\n", fname, blockcount / 16.0); + + // Go ahead and set up for another trace + DoControl(KUTRACE_CMD_RESET, 0); +} + + + +// Exit this program +// Tracing must be off +void DoQuit() { + DoOff(); + exit(0); +} + +// Add a name of type n, value number, to the trace +void addname(uint64 eventnum, uint64 number, const char* name) { + u64 temp[8]; // Buffer for name entry + u64 bytelen = strlen(name); + if (bytelen > 55) {bytelen = 55;} + u64 wordlen = 1 + ((bytelen + 7) / 8); + // Build the initial word + u64 n_with_length = eventnum + (wordlen * 16); + // T N ARG + temp[0] = (CLU(0) << 44) | (n_with_length << 32) | (number); + memset((char*)&temp[1], 0, 7 * sizeof(u64)); + memcpy((char*)&temp[1], name, bytelen); + kutrace::DoControl(KUTRACE_CMD_INSERTN, (u64)&temp[0]); +} + +// Create a Mark entry +void DoMark(u64 n, u64 arg) { + // T N ARG + u64 temp = (CLU(0) << 44) | (n << 32) | (arg & CLU(0x00000000FFFFFFFF)); + DoControl(KUTRACE_CMD_INSERT1, temp); +} + +// Create an arbitrary entry, returning 1 if tracing is on, <=0 otherwise +u64 DoEvent(u64 eventnum, u64 arg) { + // T N ARG + u64 temp = ((eventnum & CLU(0xFFF)) << 32) | (arg & CLU(0x00000000FFFFFFFF)); + return DoControl(KUTRACE_CMD_INSERT1, temp); +} + +// Uppercase are mapped to lowercase +// All unexpected characters are mapped to '.' +// - = 0x2D . = 0x2E / = 0x2F +// Base40 characters are _abcdefghijklmnopqrstuvwxyz0123456789-./ +// 0 1 2 3 +// 0123456789012345678901234567890123456789 +// where the first is NUL. +static const char kToBase40[256] = { + 0,38,38,38, 38,38,38,38, 38,38,38,38, 38,38,38,38, + 38,38,38,38, 38,38,38,38, 38,38,38,38, 38,38,38,38, + 38,38,38,38, 38,38,38,38, 38,38,38,38, 38,37,38,39, + 27,28,29,30, 31,32,33,34, 35,36,38,38, 38,38,38,38, + + 38, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11, 12,13,14,15, + 16,17,18,19, 20,21,22,23, 24,25,26,38, 38,38,38,38, + 38, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11, 12,13,14,15, + 16,17,18,19, 20,21,22,23, 24,25,26,38, 38,38,38,38, + + 38,38,38,38, 38,38,38,38, 38,38,38,38, 38,38,38,38, + 38,38,38,38, 38,38,38,38, 38,38,38,38, 38,38,38,38, + 38,38,38,38, 38,38,38,38, 38,38,38,38, 38,38,38,38, + 38,38,38,38, 38,38,38,38, 38,38,38,38, 38,38,38,38, + + 38,38,38,38, 38,38,38,38, 38,38,38,38, 38,38,38,38, + 38,38,38,38, 38,38,38,38, 38,38,38,38, 38,38,38,38, + 38,38,38,38, 38,38,38,38, 38,38,38,38, 38,38,38,38, + 38,38,38,38, 38,38,38,38, 38,38,38,38, 38,38,38,38, +}; + +static const char kFromBase40[40] = { + '\0','a','b','c', 'd','e','f','g', 'h','i','j','k', 'l','m','n','o', + 'p','q','r','s', 't','u','v','w', 'x','y','z','0', '1','2','3','4', + '5','6','7','8', '9','-','.','/', +}; + + +// Unpack six characters from 32 bits. +// str must be 8 bytes. We somewhat-arbitrarily capitalize the first letter +char* Base40ToChar(u64 base40, char* str) { + base40 &= CLU(0x00000000ffffffff); // Just low 32 bits + memset(str, 0, 8); + bool first_letter = true; + // First character went in last, comes out first + int i = 0; + while (base40 > 0) { + u64 n40 = base40 % 40; + str[i] = kFromBase40[n40]; + base40 /= 40; + if (first_letter && (1 <= n40) && (n40 <= 26)) { + str[i] &= ~0x20; // Uppercase it + first_letter = false; + } + ++i; + } + return str; +} + +// Pack six characters into 32 bits. Only use a-zA-Z0-9.-/ +u64 CharToBase40(const char* str) { + int len = strlen(str); + // If longer than 6 characters, take only the first 6 + if (len > 6) {len = 6;} + u64 base40 = 0; + // First character goes in last, comes out first + for (int i = len - 1; i >= 0; -- i) { + base40 = (base40 * 40) + kToBase40[str[i]]; + } + return base40; +} + +} // End anonymous namespace + +bool kutrace::test() {return ::TestModule();} +void kutrace::go(const char* process_name) {::DoReset(0); ::DoInit(process_name); ::DoOn();} +void kutrace::goipc(const char* process_name) {::DoReset(1); ::DoInit(process_name); ::DoOn();} +void kutrace::stop(const char* fname) {::DoOff(); ::DoFlush(); ::DoDump(fname); ::DoQuit();} +void kutrace::mark_a(const char* label) {::DoMark(KUTRACE_MARKA, ::CharToBase40(label));} +void kutrace::mark_b(const char* label) {::DoMark(KUTRACE_MARKB, ::CharToBase40(label));} +void kutrace::mark_c(const char* label) {::DoMark(KUTRACE_MARKC, ::CharToBase40(label));} +void kutrace::mark_d(uint64 n) {::DoMark(KUTRACE_MARKD, n);} + +// Returns number of words inserted 1..8, or +// 0 if tracing is off, negative if module is not not loaded +u64 kutrace::addevent(uint64 eventnum, uint64 arg) {return ::DoEvent(eventnum, arg);} + +void kutrace::addname(uint64 eventnum, uint64 number, const char* name) {::addname(eventnum, number, name);} + +void kutrace::msleep(int msec) {::msleep(msec);} +int64 kutrace::readtime() {return ::ku_get_cycles();} + +// Go ahead and expose all the routines +const char* kutrace::Base40ToChar(u64 base40, char* str) {return ::Base40ToChar(base40, str);} +u64 kutrace::CharToBase40(const char* str) {return ::CharToBase40(str);} + +u64 kutrace::DoControl(u64 command, u64 arg) { + return ::DoControl(command, arg); +} +void kutrace::DoDump(const char* fname) {::DoDump(fname);} +u64 kutrace::DoEvent(u64 eventnum, u64 arg) {return ::DoEvent(eventnum, arg);} +void kutrace::DoFlush() {::DoFlush();} +void kutrace::DoInit(const char* process_name) {::DoInit(process_name);} +void kutrace::DoMark(u64 n, u64 arg) {::DoMark(n, arg);} +bool kutrace::DoTest() {return ::DoTest();} +bool kutrace::DoOff() {return ::DoOff();} +bool kutrace::DoOn() {return ::DoOn();} +void kutrace::DoQuit() {::DoQuit();} +void kutrace::DoReset(u64 doing_ipc){::DoReset(doing_ipc);} +void kutrace::DoStat(u64 control_flags) {::DoStat(control_flags);} +void kutrace::EmitNames(const NumNamePair* ipair, u64 n) {::EmitNames(ipair, n);} +u64 kutrace::GetUsec() {return ::GetUsec();} +const char* kutrace::MakeTraceFileName(const char* name, char* str) { + return ::MakeTraceFileName(name, str); +} +bool kutrace::TestModule() {return ::TestModule();} + + + + + diff --git a/book-user-code/kutrace_lib.h b/book-user-code/kutrace_lib.h new file mode 100644 index 000000000000..f96c9a557bb5 --- /dev/null +++ b/book-user-code/kutrace_lib.h @@ -0,0 +1,282 @@ +// kutrace_lib.h +// Copyright 2021 Richard L. Sites +// +// This is a simple interface for user-mode code to control kernel/user tracing and add markers +// + +#ifndef __KUTRACE_LIB_H__ +#define __KUTRACE_LIB_H__ + +#include "basetypes.h" + +typedef uint32 u32; +typedef uint64 u64; +typedef int64 s64; + + +typedef struct { + int number; + const char* name; +} NumNamePair; + + +/* This is the definitive list of raw trace 12-bit event numbers */ +// These user-mode declarations need to exactly match +// include/linux/kutrace.h kernel-mode ones + +/* kutrace_control() commands */ +#define KUTRACE_CMD_OFF 0 +#define KUTRACE_CMD_ON 1 +#define KUTRACE_CMD_FLUSH 2 +#define KUTRACE_CMD_RESET 3 +#define KUTRACE_CMD_STAT 4 +#define KUTRACE_CMD_GETCOUNT 5 +#define KUTRACE_CMD_GETWORD 6 +#define KUTRACE_CMD_INSERT1 7 +#define KUTRACE_CMD_INSERTN 8 +#define KUTRACE_CMD_GETIPCWORD 9 +#define KUTRACE_CMD_TEST 10 +#define KUTRACE_CMD_VERSION 11 + + + +// All events are single uint64 entries unless otherwise specified +// +-------------------+-----------+---------------+-------+-------+ +// | timestamp | event | delta | retval| arg0 | +// +-------------------+-----------+---------------+-------+-------+ +// 20 12 8 8 16 + +// Add KUTRACE_ and uppercase +#define KUTRACE_NOP 0x000 +#define KUTRACE_RDTSC 0x001 // unused +#define KUTRACE_GETTOD 0x002 // unused + +#define KUTRACE_VARLENLO 0x010 +#define KUTRACE_VARLENHI 0x1FF + +// Variable-length starting numbers. Only events 010-1FF are variable length +// Middle hex digit of event number is 2..8, giving total length of entry including first uint64 +// The arg is the lock# or PID# etc. that this name belongs to. +// +-------------------+-----------+-------------------------------+ +// | timestamp | event | arg | +// +-------------------+-----------+-------------------------------+ +// | character name, 1-56 bytes, NUL padded | +// +- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -+ +// ~ ~ +// +---------------------------------------------------------------+ +// 20 12 32 + +// TimePair (DEFUNCT) +// +-------------------+-----------+-------------------------------+ +// | timestamp | event | arg | +// +-------------------+-----------+-------------------------------+ +// | cycle counter value | +// +---------------------------------------------------------------+ +// | matching gettimeofday value | +// +---------------------------------------------------------------+ +// 20 12 32 + + +// Variable-length starting numbers. +// Middle hex digit will become length in u64 words, 2..8 +#define KUTRACE_FILENAME 0x001 +#define KUTRACE_PIDNAME 0x002 +#define KUTRACE_METHODNAME 0x003 +#define KUTRACE_TRAPNAME 0x004 +#define KUTRACE_INTERRUPTNAME 0x005 +#define KUTRACE_TIMEPAIR 0x006 /* DEPRECATED */ +#define KUTRACE_LOCKNAME 0x007 /* added 2019.10.25 */ +#define KUTRACE_SYSCALL64NAME 0x008 +#define KUTRACE_SYSCALL32NAME 0x00C +#define KUTRACE_PACKETNAME 0x100 +#define KUTRACE_PC_TEMP 0x101 /* scaffolding 2020.01.29 now PC_U nd PC_K */ +#define KUTRACE_KERNEL_VER 0x102 /* Kernel version, uname -rv */ +#define KUTRACE_MODEL_NAME 0x103 /* CPU model name, /proc/cpuinfo */ +#define KUTRACE_HOST_NAME 0x104 /* CPU host name */ +#define KUTRACE_QUEUE_NAME 0x105 /* Queue name */ +#define KUTRACE_RES_NAME 0x106 /* Arbitrary resource name */ + +// Specials are point events. Hex 200-220 currently. PC sample is outside this range +#define KUTRACE_USERPID 0x200 /* Context switch */ +#define KUTRACE_RPCIDREQ 0x201 /* CPU is processing RPC# n request */ +#define KUTRACE_RPCIDRESP 0x202 /* CPU is processing RPC# n response */ +#define KUTRACE_RPCIDMID 0x203 /* CPU is processing RPC# n middle */ +#define KUTRACE_RPCIDRXMSG 0x204 /* For display: RPC message received, approx packet time */ +#define KUTRACE_RPCIDTXMSG 0x205 /* For display: RPC message sent, approx packet time */ +#define KUTRACE_RUNNABLE 0x206 /* Make runnable */ +#define KUTRACE_IPI 0x207 /* Send IPI */ +#define KUTRACE_MWAIT 0x208 /* C-states: how deep to sleep */ +#define KUTRACE_PSTATE 0x209 /* P-states: cpu freq sample in MHz increments */ + + +// MARK_A,B,C arg is six base-40 chars NUL, A-Z, 0-9, . - / +// MARK_D arg is unsigned int +// +-------------------+-----------+-------------------------------+ +// | timestamp | event | arg | +// +-------------------+-----------+-------------------------------+ +// 20 12 32 + +#define KUTRACE_MARKA 0x20A +#define KUTRACE_MARKB 0x20B +#define KUTRACE_MARKC 0x20C +#define KUTRACE_MARKD 0x20D +#define KUTRACE_LEFTMARK 0x20E // Inserted by eventtospan +#define KUTRACE_RIGHTMARK 0x20F // Inserted by eventtospan +#define KUTRACE_LOCKNOACQUIRE 0x210 +#define KUTRACE_LOCKACQUIRE 0x211 +#define KUTRACE_LOCKWAKEUP 0x212 + +// Added 2020.10.29 +#define KUTRACE_RX_PKT 0x214 /* Raw packet received w/32-byte payload hash */ +#define KUTRACE_TX_PKT 0x215 /* Raw packet sent w/32-byte payload hash */ + +#define KUTRACE_RX_USER 0x216 /* Request beginning at user code w/32-byte payload hash */ +#define KUTRACE_TX_USER 0x217 /* Response ending at user code w/32-byte payload hash */ + +#define KUTRACE_MBIT_SEC 0x218 /* Network rate in Mb/s */ + +#define KUTRACE_RESOURCE 0x219 /* Arbitrary resource span; arg says which resource */ +#define KUTRACE_ENQUEUE 0x21A /* Put RPC on a work queue; arg says which queue */ +#define KUTRACE_DEQUEUE 0x21B /* Remove RPC from a queue; arg says which queue */ +#define KUTRACE_PSTATE2 0x21C /* P-states: cpu freq change, new in MHz increments */ + + +#define KUTRACE_MAX_SPECIAL 0x27F // Last special, range 200..27F + +// Extra events have duration, but are otherwise similar to specials +// PC sample. Not a special +#define KUTRACE_PC_U 0x280 /* added 2020.01.29 */ +#define KUTRACE_PC_K 0x281 /* added 2020.02.01 */ + +// Lock held +#define KUTRACE_LOCK_HELD 0x282 /* Inserted by eventtospan 2020.09.27 */ +#define KUTRACE_LOCK_TRY 0x283 /* Inserted by eventtospan 2020.09.27 */ + + +/* Reasons for waiting, inserted only in postprocessing */ +/* dsites 2019.10.25 */ +#define KUTRACE_WAITA 0x0300 /* a-z, through 0x0319 */ +#define KUTRACE_WAITZ 0x0319 + +/* These are in blocks of 256 or 512 numbers */ +#define KUTRACE_TRAP 0x0400 +#define KUTRACE_IRQ 0x0500 +#define KUTRACE_TRAPRET 0x0600 +#define KUTRACE_IRQRET 0x0700 +#define KUTRACE_SYSCALL64 0x0800 +#define KUTRACE_SYSRET64 0x0A00 +#define KUTRACE_SYSCALL32 0x0C00 +#define KUTRACE_SYSRET32 0x0E00 + +/* Event numbers added in postprocessing or manually */ +/* -1 bracket, big } */ +/* -2 oval, fades out part of diagram */ +/* -3 arc, wakeup from one thread to another */ +/* -4 callout, bubble to label some event */ +/* -5 ... */ + + +// Names for events 000-00F could be added when one of these code points is +// actually used + +// Names for the variable-length events 0y0-0yF and 1y0-1yF, where y is length in words 2..8 +static const char* const kNameName[32] = { + "-000-", "file", "pid", "rpc", + "trap", "irq", "trap", "irq", + "syscall", "syscall", "syscall", "syscall", + "syscall32", "syscall32", "syscall32", "syscall32", + + "packet", "pctmp", "kernv", "cpum", + "host", "", "", "", + "", "", "", "", + "", "", "", "", +}; + +// Names for the special events 200-21F +static const char* const kSpecialName[32] = { + "userpid", "rpcreq", "rpcresp", "rpcmid", + "rxmsg", "txmsg", "runnable", "sendipi", + "mwait", "freq", "mark_a", "mark_b", + "mark_c", "mark_d", "-20e-", "-20f-", + "try_", "acq_", "rel_", "-213-", // Locks + "rx", "tx", "urx", "utx", + "mbs", "res", "enq", "deq", + "-21c-", "-21d-", "-21e-", "-21f-", +}; + +// Names for events 210-3FF could be added when one of these code points is +// actually used + +// Names for events 400-FFF are always embedded in the trace + +// x86- and ARM-specific Names for return codes -128 to -1 +// If errno is in [-128..-1], subscript this by -errno - 1. +// Error -1 EPERM thus maps to kErrnoName[0], not [1] +// See include/uapi/asm-generic/errno-base.h +// See include/uapi/asm-generic/errno.h +// ...more could be added +static const char* const kErrnoName[128] = { + "EPERM", "ENOENT", "ESRCH", "EINTR", "EIO", "ENXIO", "E2BIG", "ENOEXEC", + "EBADF", "ECHILD", "EAGAIN", "ENOMEM", "EACCES", "EFAULT", "ENOTBLK", "EBUSY", + "EEXIST", "EXDEV", "ENODEV", "ENOTDIR", "EISDIR", "EINVAL", "ENFILE", "EMFILE", + "ENOTTY", "ETXTBSY", "EFBIG", "ENOSPC", "ESPIPE", "EROFS", "EMLINK", "EPIPE", + + "EDOM", "ERANGE", "EDEADLK", "ENAMETOOLONG", "ENOLCK", "ENOSYS", "ENOTEMPTY", "ELOOP", + "", "ENOMSG", "EIDRM", "ECHRNG", "EL2NSYNC", "EL3HLT", "EL3RST", "ELNRNG", + "EUNATCH", "ENOCSI", "EL2HLT", "EBADE", "EBADR", "EXFULL", "ENOANO", "EBADRQC", + "EBADSLT", "", "EBFONT", "ENOSTR", "ENODATA", "ETIME", "ENOSR", "ENONET", + + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", + "", "", "", "", "", "", "", "", +}; + + +namespace kutrace { + bool test(); + void go(const char* process_name); + void goipc(const char* process_name); + void stop(const char* fname); + void mark_a(const char* label); + void mark_b(const char* label); + void mark_c(const char* label); + void mark_d(u64 n); + + // Returns number of words inserted 1..8, or + // 0 if tracing is off, negative if module is not not loaded + u64 addevent(u64 eventnum, u64 arg); + void addname(u64 eventnum, u64 number, const char* name); + + void msleep(int msec); + int64 readtime(); + + const char* Base40ToChar(u64 base40, char* str); + u64 CharToBase40(const char* str); + + u64 DoControl(u64 command, u64 arg); + void DoDump(const char* fname); + u64 DoEvent(u64 eventnum, u64 arg); + void DoFlush(); + void DoInit(const char* process_name); + void DoMark(u64 n, u64 arg); + bool DoTest(); + bool DoOff(); + bool DoOn(); + void DoQuit(); + void DoReset(u64 doing_ipc); + void DoStat(u64 control_flags); + void EmitNames(const NumNamePair* ipair, u64 n); + u64 GetUsec(); + const char* MakeTraceFileName(const char* name, char* str); + bool TestModule(); +} + +#endif // __KUTRACE_LIB_H__ + + diff --git a/book-user-code/makeself.cc b/book-user-code/makeself.cc new file mode 100644 index 000000000000..b607ea9a85fa --- /dev/null +++ b/book-user-code/makeself.cc @@ -0,0 +1,189 @@ +// Little program to make a self-contained HTML file for displaying dclab graphs. +// Copyright 2021 Richard L. Sites +// +// Inputs +// (1) A base HTML file with everything except for a library and json data +// This file contains three stylized comments that indicate where to +// include the other pieces, specified as arg[1] +// (2) The d3.v4.min.js JavaScript library, fetched from the same directory as this program +// (3) A JSON file of data to graph, specified as arg[2] +// +// Output +// A new self-contained HTML file written to arg[3] +// + +#include +#include +#include // exit +#include + +static const char* const_text_1 = ""; + +static const char* const_text_3 = "var myString = '"; +static const char* const_text_4 = "';"; + +//static const char* const_text_5 = "data = JSON.parse(myString); newdata2_resize(data);"; +// Now uses onload="initAll()" +static const char* const_text_5 = ""; +static const char* const_text_6 = ""; + + +void usage() { + fprintf(stderr, "Usage: makeself \n"); + exit(0); +} + +int main (int argc, const char** argv) { + if (argc < 2) {usage();} + + FILE* finlib = fopen("d3.v4.min.js", "rb"); + if (finlib == NULL) {fprintf(stderr, "%s did not open.\n", "d3.v4.min.js");} + + FILE* finhtml = fopen(argv[1], "rb"); + if (finhtml == NULL) {fprintf(stderr, "%s did not open.\n", argv[1]);} + + FILE* finjson = NULL; + FILE* fouthtml = NULL; + if (argc >= 4) { + finjson = fopen(argv[2], "rb"); + if (finjson == NULL) {fprintf(stderr, "%s did not open.\n", argv[2]);} + + fouthtml = fopen(argv[3], "wb"); + if (fouthtml == NULL) {fprintf(stderr, "%s did not open.\n", argv[3]);} + } else if (argc == 3) { + // Pipe from stdin + finjson = stdin; + + fouthtml = fopen(argv[2], "wb"); + if (fouthtml == NULL) {fprintf(stderr, "%s did not open.\n", argv[2]);} + } else { + // Pipe from stdin and to stdout + finjson = stdin; + fouthtml = stdout; + } + + if (finhtml == NULL || finjson == NULL || finlib == NULL || fouthtml == NULL) { + exit(0); + } + + char* inlib_buf = new char[ 1000000]; + char* inhtml_buf = new char[ 1000000]; + char* injson_buf = new char[250000000]; // 250MB + + int lib_len = fread(inlib_buf, 1, 1000000, finlib); + fclose(finlib); + + int html_len = fread(inhtml_buf, 1, 1000000, finhtml); + fclose(finhtml); + + int json_len = fread(injson_buf, 1, 250000000, finjson); + if (finjson != stdin) {fclose(finjson);} + + char* self0 = strstr(inhtml_buf, ""); + char* self1 = strstr(inhtml_buf, ""); + char* self2 = strstr(inhtml_buf, ""); + + if (self0 == NULL || self1 == NULL || self2 == NULL) { + fprintf(stderr, "%s does not contain selfcontained* comments\n", argv[1]); + exit(0); + } + + char* self0_end = strchr(self0, '\n'); + if (self0_end == NULL) {fprintf(stderr, "Missing after selfcontained0\n");} + ++self0_end; // over the + + char* self0_cr2 = strchr(self0_end, '\n'); + if (self0_cr2 == NULL) {fprintf(stderr, "Missing second after selfcontained0\n");} + ++self0_cr2; // over the + + char* self1_end = strchr(self1 + 1, '\n'); + if (self1_end == NULL) {fprintf(stderr, "Missing after selfcontained1\n");} + ++self1_end; // over the + + char* self2_end = strchr(self2 + 1, '\n'); + if (self2_end == NULL) {fprintf(stderr, "Missing after selfcontained2\n");} + ++self2_end; // over the + + + // Output is + // inhtml_buf up to self0 (len1), + // minus the next line (the include for d3.v4.min.js) + // plus constant text + // plus inlib_buf + // plus constant text + // + // plus inhtml_buf between self0 and self1 (len2) + // plus constant text + // plus injson_buf with all turned into space + // plus constant text + // + // plus inhtml_buf between self1 and self2 (len3) + // plus constant text to display json + // plus constant text + // + // plus inhtml_buf after self2 (len4) + + const char* prior_line = &injson_buf[0]; + int linenum = 1; + bool check_sorted = true; + for (int i = 0; i < json_len; ++i) { + if (injson_buf[i] == '\n') { + ++linenum; + const char* next_line = &injson_buf[i + 1]; + // Check for sorted + if (i < json_len - 5) { + if (check_sorted && (strncmp(prior_line, next_line, 4) > 0)) { + fprintf(stderr, "Input not sorted at line %d\n", linenum); + char temp[64]; + strncpy(temp, next_line, 64); + temp[63] = '\0'; + fprintf(stderr, " '%s...'\n", temp); + exit(0); + } + // Stop checking sorted at first line that has "[999.0," in column 1 + if (strncmp(next_line, "[999", 4) == 0) {check_sorted = false;} + // Stop checking sorted if line has " \"unsorted\"" in column 1 + if ((i < json_len - 11) && (strncmp(next_line, " \"unsorted\"", 11) == 0)) {check_sorted = false;} + // Stop checking sorted if line has " \"presorted\"" in column 1 + if ((i < json_len - 12) && (strncmp(next_line, " \"presorted\"", 12) == 0)) {check_sorted = false;} + } + + prior_line = next_line; + // Replace newline with space -- JSON string may not contain newline + injson_buf[i] = ' '; + // Replace backslash with two of them + // Replace quote with backslash quote + } + } + + // Lengths of four inhtml pieces + int len1 = self0_end - inhtml_buf; + int len2 = self1_end - self0_cr2; // Skips one line of d3.v4.min.js include + int len3 = self2_end - self1_end; + int len4 = (inhtml_buf + html_len) - self2_end; + + fwrite(inhtml_buf, 1, len1, fouthtml); + fwrite(const_text_1, 1, strlen(const_text_1), fouthtml); + fwrite(inlib_buf, 1, lib_len, fouthtml); + fwrite(const_text_2, 1, strlen(const_text_2), fouthtml); + + fwrite(self0_cr2, 1, len2, fouthtml); + + fwrite(const_text_3, 1, strlen(const_text_3), fouthtml); + fwrite(injson_buf, 1, json_len, fouthtml); + fwrite(const_text_4, 1, strlen(const_text_4), fouthtml); + + fwrite(self1_end, 1, len3, fouthtml); + fwrite(const_text_5, 1, strlen(const_text_5), fouthtml); + fwrite(const_text_6, 1, strlen(const_text_6), fouthtml); + + fwrite(self2_end, 1, len4, fouthtml); + if (fouthtml != stdout) {fclose(fouthtml);} + + free(inlib_buf); + free(inhtml_buf); + free(injson_buf); + return 0; +} + diff --git a/book-user-code/matrix.cc b/book-user-code/matrix.cc new file mode 100644 index 000000000000..5fa5ea49c755 --- /dev/null +++ b/book-user-code/matrix.cc @@ -0,0 +1,971 @@ +// Matrix multiply experiments, looking at cache blocking +// Copyright 2021 Richard L. Sites + +#include +#include +#include // gettimeofday +#include "basetypes.h" +#include "kutrace_lib.h" +#include "timecounters.h" + +// compile with g++ -O2 matrix.cc kutrace_lib.cc -o matrix_ku + + +// +// Sample timings, with cache simulation and miss counts +// +// SimpleMultiply 87.137 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 1077341184 1058414205 886641817 +// +// remapsize = 8 +// BlockMultiplyRemap 38.692 seconds, sum=2494884076.030973911 +// Misses L1/L2/L3 35197695 17894256 15430436 +// +// remapsize = 16 +// BlockMultiplyRemap 37.028 seconds, sum=2494884076.030989647 +// Misses L1/L2/L3 19917433 10416455 8750623 +// +// remapsize = 32 +// BlockMultiplyRemap 37.840 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 26161141 8116254 5228737 +// +// remapsize = 64 +// BlockMultiplyRemap 46.340 seconds, sum=2494884076.030960083 +// Misses L1/L2/L3 111357948 6005061 3741151 +// +// SimpleMultiplyTranspose 57.594 seconds, sum=2494884076.030954838 +// Misses L1/L2/L3 268100748 147229568 141132672 +// +// +// Without cache simulation +// SimpleMultiply 6.478 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 0 0 0 +// remapsize = 8 +// BlockMultiplyRemap 0.598 seconds, sum=2494884076.030973911 +// Misses L1/L2/L3 0 0 0 +// remapsize = 16 +// BlockMultiplyRemap 0.419 seconds, sum=2494884076.030989647 +// Misses L1/L2/L3 0 0 0 +// remapsize = 32 +// BlockMultiplyRemap 0.368 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 0 0 0 +// remapsize = 64 +// BlockMultiplyRemap 0.356 seconds, sum=2494884076.030960083 +// Misses L1/L2/L3 0 0 0 +// +// SimpleMultiplyTranspose 1.139 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 0 0 0 +// +// Without low bits L3 set selection +// SimpleMultiply 87.900 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 1077341184 1058414205 886641817 +// SimpleMultiplyColumnwise 80.283 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 1209008128 1209008128 1092145348 +// +// With XOR in L3 set selection +// SimpleMultiply 90.130 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 1077341184 1058414205 746808422 +// SimpleMultiplyColumnwise 88.247 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 1209008128 1209008128 185272771 +// --------- net 20% faster than rowwise +// 00 +// SimpleMultiply 6.482 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 0 0 0 +// SimpleMultiplyColumnwise 5.115 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 0 0 0 +// SimpleMultiplyTranspose 0.584 seconds, sum=2494884076.030954838 +// Misses L1/L2/L3 0 0 0 +// BlockMultiplyRemap 0.602 seconds, sum=2494884076.030973911 +// Misses L1/L2/L3 0 0 0 +// +// 01 +// SimpleMultiply 6.458 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 0 0 0 +// SimpleMultiplyColumnwise 5.211 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 0 0 0 +// SimpleMultiplyTranspose 0.594 seconds, sum=2494884076.030954838 +// Misses L1/L2/L3 0 0 0 +// BlockMultiplyRemap 0.630 seconds, sum=2494884076.030973911 +// Misses L1/L2/L3 0 0 0 +// +// 10 +// Remap Misses L1/L2/L3 524288 523928 522560 +// Transpose Misses L1/L2/L3 2359296 2359258 2342724 +// SimpleMultiply 87.108 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 1077341184 1058414205 886641817 +// SimpleMultiplyColumnwise 79.103 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 1209008128 1209008128 1092145348 +// SimpleMultiplyTranspose 57.489 seconds, sum=2494884076.030954838 +// Misses L1/L2/L3 268100748 147229568 141132672 +// BlockMultiplyRemap 38.472 seconds, sum=2494884076.030973911 +// Misses L1/L2/L3 35197695 17894256 15430436 +// +// 11 +// Remap Misses L1/L2/L3 524288 523928 518315 +// Transpose Misses L1/L2/L3 2359296 2359258 943198 +// SimpleMultiply 89.591 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 1077341184 1058414205 751193415 +// SimpleMultiplyColumnwise 87.377 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 1209008128 1209008128 184542843 +// SimpleMultiplyTranspose 64.925 seconds, sum=2494884076.030954838 +// Misses L1/L2/L3 268100748 147229568 132833587 +// BlockMultiplyRemap 42.061 seconds, sum=2494884076.030973911 +// Misses L1/L2/L3 35197695 17894256 16794674 +// +// 00 w/slow transpose +// SimpleMultiply 6.422 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 0 0 0 +// SimpleMultiplyColumnwise 5.162 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 0 0 0 +// SimpleMultiplyTranspose 1.138 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 0 0 0 +// SimpleMultiplyTransposeFast 0.586 seconds, sum=2494884076.030954838 +// Misses L1/L2/L3 0 0 0 +// BlockMultiplyRemap 0.613 seconds, sum=2494884076.030973911 +// Misses L1/L2/L3 0 0 0 +// +// 01 w/slow transpose +// SimpleMultiply 6.365 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 0 0 0 +// SimpleMultiplyColumnwise 5.044 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 0 0 0 +// SimpleMultiplyTranspose 1.144 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 0 0 0 +// SimpleMultiplyTransposeFast 0.579 seconds, sum=2494884076.030954838 +// Misses L1/L2/L3 0 0 0 +// BlockMultiplyRemap 0.601 seconds, sum=2494884076.030973911 +// Misses L1/L2/L3 0 0 0 +// +// 10 w/slow transpose +// Transpose Misses L1/L2/L3 2359296 2359258 2342724 +// BlockTranspose Misses L1/L2/L3 552960 524395 522240 +// +// SimpleMultiply 87.978 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 1077341184 1058414205 886641817 +// SimpleMultiplyColumnwise 79.212 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 1209008128 1209008128 1092145348 +// SimpleMultiplyTranspose 43.685 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 269018803 148146944 142050176 +// SimpleMultiplyTransposeFast 58.290 seconds, sum=2494884076.030954838 +// Misses L1/L2/L3 268100748 147229568 141132672 +// BlockMultiplyRemap 39.473 seconds, sum=2494884076.030973911 +// Misses L1/L2/L3 35197695 17894256 15430436 +// +// 11 w/slow transpose +// Transpose Misses L1/L2/L3 2359296 2359258 1019221 +// BlockTranspose Misses L1/L2/L3 552960 524395 522427 +// +// SimpleMultiply 90.130 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 1077341184 1058414205 752975084 +// SimpleMultiplyColumnwise 89.653 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 1209008128 1209008128 183260146 +// SimpleMultiplyTranspose 44.817 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 269018803 148146944 133124904 +// SimpleMultiplyTransposeFast 61.296 seconds, sum=2494884076.030954838 +// Misses L1/L2/L3 268100748 147229568 132811796 +// BlockMultiplyRemap 38.003 seconds, sum=2494884076.030973911 +// Misses L1/L2/L3 35197695 17894256 16808337 +// +// 32x32 remap +// BlockMultiplyRemap 0.373 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 26161141 8116254 5228737 +// +// BlockMultiplyRemap 0.392 seconds, sum=2494884076.030955315 +// Misses L1/L2/L3 26161141 8116254 5243627 +// +// Remap Misses L1/L2/L3 524288 524288 523376 +// Remap Misses L1/L2/L3 524288 524288 517579 +// + +#define TRACK_CACHES 0 +#define HASHED_L3 0 + +static const int kRowsize = 1024; +static const int kColsize = kRowsize; +static const int kBlocksize = 8; + +static const int kRemapsize = 32; +//static const int kRemapsize = 16; +//static const int kRemapsize = 32; +//static const int kRemapsize = 64; + +////typedef unsigned long int uint64; +typedef void MulProc(const double* a, const double* b, double* c); + +static double* aa = NULL; +static double* bb = NULL; +static double* cc = NULL; + +static const int kL1LgSize = 15; +static const int kL1LgAssoc = 3; +static const int kL1LgLinesize = 6; +static const int kL1LgSetsize = kL1LgSize - kL1LgAssoc - kL1LgLinesize; +static const int kL1Setsize = 1 << kL1LgSetsize; +static const int kL1Assoc = 1 << kL1LgAssoc; +static const int kL1Assocmask = kL1Assoc - 1; +static const uint64 kL1Setmask = (1l << kL1LgSetsize) - 1; +static const uint64 kL1Tagmask = (1l << kL1LgLinesize) - 1; + +static const int kL2LgSize = 18; +static const int kL2LgAssoc = 3; +static const int kL2LgLinesize = 6; +static const int kL2LgSetsize = kL2LgSize - kL2LgAssoc - kL2LgLinesize; +static const int kL2Setsize = 1 << kL2LgSetsize; +static const int kL2Assoc = 1 << kL2LgAssoc; +static const int kL2Assocmask = kL2Assoc - 1; +static const uint64 kL2Setmask = (1l << kL2LgSetsize) - 1; +static const uint64 kL2Tagmask = (1l << kL2LgLinesize) - 1; + + +static const int kL3LgSize = 21; +static const int kL3LgAssoc = 4; +static const int kL3LgLinesize = 6; +static const int kL3LgSetsize = kL3LgSize - kL3LgAssoc - kL3LgLinesize; +static const int kL3Setsize = 1 << kL3LgSetsize; +static const int kL3Assoc = 1 << kL3LgAssoc; +static const int kL3Assocmask = kL3Assoc - 1; +static const uint64 kL3Setmask = (1l << kL3LgSetsize) - 1; +static const uint64 kL3Tagmask = (1l << kL3LgLinesize) - 1; + +static uint64 L1misses = 0; +static uint64 L2misses = 0; +static uint64 L3misses = 0; +static int L1rr = 0; +static int L2rr = 0; +static int L3rr = 0; + +static uint64 L1tag[kL1Setsize * kL1Assoc]; +static uint64 L2tag[kL2Setsize * kL2Assoc]; +static uint64 L3tag[kL3Setsize * kL3Assoc]; + + +void InitTags() { + memset(L1tag, 0, kL1Setsize * kL1Assoc * sizeof(uint64)); + memset(L2tag, 0, kL2Setsize * kL2Assoc * sizeof(uint64)); + memset(L3tag, 0, kL3Setsize * kL3Assoc * sizeof(uint64)); + L1misses = L2misses = L3misses = 0; +} + +#if TRACK_CACHES +bool L1(uint64 addr) { + int set = ((addr >> kL1LgLinesize) & kL1Setmask) << kL1LgAssoc; + uint64 tag = addr & ~kL1Tagmask; + for (int i = 0; i < kL1Assoc; ++i) { + if (L1tag[set + i] == tag) {return true;} + } + ++L1misses; + L1tag[set + L1rr] = tag; + L1rr = (L1rr + 1) & kL1Assocmask; + return false; +} + +bool L2(uint64 addr) { + int set = ((addr >> kL2LgLinesize) & kL2Setmask) << kL2LgAssoc; + uint64 tag = addr & ~kL2Tagmask; + for (int i = 0; i < kL2Assoc; ++i) { + if (L2tag[set + i] == tag) {return true;} + } + ++L2misses; + L2tag[set + L2rr] = tag; + L2rr = (L2rr + 1) & kL2Assocmask; + return false; +} + +bool L3(uint64 addr) { +#if HASHED_L3 + int set = (((addr >> kL3LgLinesize) ^ (addr >> kL3LgSize)) & kL3Setmask) << kL3LgAssoc; +#else + int set = ((addr >> kL3LgLinesize) & kL3Setmask) << kL3LgAssoc; +#endif + uint64 tag = addr & ~kL3Tagmask; + for (int i = 0; i < kL3Assoc; ++i) { + if (L3tag[set + i] == tag) {return true;} + } + ++L3misses; + L3tag[set + L3rr] = tag; + L3rr = (L3rr + 1) & kL3Assocmask; + return false; +} + +void L123(uint64 addr) { + L1(addr); + L2(addr); + L3(addr); +} + +#else + +bool L1(uint64 addr) {return false;} +bool L2(uint64 addr) {return false;} +bool L3(uint64 addr) {return false;} +void L123(uint64 addr) {} +#endif + + + +// Give simple values near 1.0 to each element of arr +void SimpleInit(double* arr) { + for (int row = 0; row < kRowsize; ++row) { + for (int col = 0; col < kColsize; ++col) { + arr[row * kRowsize + col] = 1.0 + (row * kRowsize + col) / 1000000.0; + } + } +} + +// Zero arr +void ZeroInit(double* arr) { + for (int row = 0; row < kRowsize; ++row) { + for (int col = 0; col < kColsize; ++col) { + arr[row * kRowsize + col] = 0.0; + } + } +} + +// Sum all the elements of arr -- used for simple sameness check +double SimpleSum(double* arr) { + double sum = 0.0; + for (int row = 0; row < kRowsize; ++row) { + for (int col = 0; col < kColsize; ++col) { + sum += arr[row * kRowsize + col]; + } + } + return sum; +} + +// Test two arrays for equality +bool EqualArray(const double* arr1, const double* arr2) { + for (int k = 0; k < kRowsize * kColsize; ++k) { + if (arr1[k] != arr2[k]) {return false;} + } + return true; +} + +void TimeMe(const char* label, MulProc f, const double* a, const double* b, double* c) { + InitTags(); + int64 start_usec = GetUsec(); + f(a, b, c); + int64 stop_usec = GetUsec(); + double duration_usec = stop_usec - start_usec; + fprintf(stdout, "%s\t%5.3f seconds, sum=%18.9f\n", label, duration_usec/1000000.0, SimpleSum(c)); + fprintf(stdout, "Misses L1/L2/L3 %10lld %10lld %10lld\n", L1misses, L2misses, L3misses); +} + + + +inline +double VectorSum1(const double* aptr, const double* bptr, int count, int rowsize) { + const double* aptr2 = aptr; + const double* bptr2 = bptr; + double sum0 = 0.0; + for (int k = 0; k < count; ++k) { + sum0 += aptr2[0] * bptr2[0 * rowsize]; +L1((uint64)&aptr2[0]); +L2((uint64)&aptr2[0]); +L3((uint64)&aptr2[0]); +L1((uint64)&bptr2[0 * rowsize]); +L2((uint64)&bptr2[0 * rowsize]); +L3((uint64)&bptr2[0 * rowsize]); + aptr2 += 1; + bptr2 += 1 * rowsize; + } + return (sum0); +} + +inline +double VectorSum2(const double* aptr, const double* bptr, int count, int rowsize) { + const double* aptr2 = aptr; + const double* bptr2 = bptr; + double sum0 = 0.0; + double sum1 = 0.0; + for (int k = 0; k < count; k += 2) { + sum0 += aptr2[0] * bptr2[0 * rowsize]; + sum1 += aptr2[1] * bptr2[1 * rowsize]; +L1((uint64)&aptr2[0]); +L2((uint64)&aptr2[0]); +L3((uint64)&aptr2[0]); +L1((uint64)&bptr2[0 * rowsize]); +L2((uint64)&bptr2[0 * rowsize]); +L3((uint64)&bptr2[0 * rowsize]); +L1((uint64)&aptr2[1]); +L2((uint64)&aptr2[1]); +L3((uint64)&aptr2[1]); +L1((uint64)&bptr2[1 * rowsize]); +L2((uint64)&bptr2[1 * rowsize]); +L3((uint64)&bptr2[1 * rowsize]); + aptr2 += 2; + bptr2 += 2 * rowsize; + } + return (sum0 + sum1); +} + +inline +double VectorSum4(const double* aptr, const double* bptr, int count, int rowsize) { + const double* aptr2 = aptr; + const double* bptr2 = bptr; + double sum0 = 0.0; + double sum1 = 0.0; + double sum2 = 0.0; + double sum3 = 0.0; + for (int k = 0; k < count; k += 4) { + sum0 += aptr2[0] * bptr2[0 * rowsize]; + sum1 += aptr2[1] * bptr2[1 * rowsize]; + sum2 += aptr2[2] * bptr2[2 * rowsize]; + sum3 += aptr2[3] * bptr2[3 * rowsize]; +L1((uint64)&aptr2[0]); +L2((uint64)&aptr2[0]); +L3((uint64)&aptr2[0]); +L1((uint64)&bptr2[0 * rowsize]); +L2((uint64)&bptr2[0 * rowsize]); +L3((uint64)&bptr2[0 * rowsize]); +L1((uint64)&aptr2[1]); +L2((uint64)&aptr2[1]); +L3((uint64)&aptr2[1]); +L1((uint64)&bptr2[1 * rowsize]); +L2((uint64)&bptr2[1 * rowsize]); +L3((uint64)&bptr2[1 * rowsize]); +L1((uint64)&aptr2[2]); +L2((uint64)&aptr2[2]); +L3((uint64)&aptr2[2]); +L1((uint64)&bptr2[2 * rowsize]); +L2((uint64)&bptr2[2 * rowsize]); +L3((uint64)&bptr2[2 * rowsize]); +L1((uint64)&aptr2[3]); +L2((uint64)&aptr2[3]); +L3((uint64)&aptr2[3]); +L1((uint64)&bptr2[3 * rowsize]); +L2((uint64)&bptr2[3 * rowsize]); +L3((uint64)&bptr2[3 * rowsize]); + aptr2 += 4; + bptr2 += 4 * rowsize; + } + return (sum0 + sum1 + sum2 + sum3); +} + +// +//============================================================================== +// + +void SimpleMultiply(const double* a, const double* b, double* c) { + for (int row = 0; row < kRowsize; ++row) { + for (int col = 0; col < kColsize; ++col) { +bool traceme = (col<2) & (row < 2); +traceme = false; +if (traceme) {fprintf(stdout, "[%d,%d] = ", row, col);} + double sum = 0.0; + for (int k = 0; k < kRowsize; ++k) { + sum += a[row * kRowsize + k] * b[k * kRowsize + col]; +int hit1a = L1((uint64)&a[row * kRowsize + k]); +int hit2a = L2((uint64)&a[row * kRowsize + k]); +int hit3a = L3((uint64)&a[row * kRowsize + k]); +int hit1b = L1((uint64)&b[k * kRowsize + col]); +int hit2b = L2((uint64)&b[k * kRowsize + col]); +int hit3b = L3((uint64)&b[k * kRowsize + col]); +if (traceme) {fprintf(stdout, "%016llx %016llx a%d%d%d b%d%d%d ", +(uint64)&a[row * kRowsize + k], (uint64)&b[k * kRowsize + col], +hit1a, hit2a, hit3a, hit1b, hit2b, hit3b);} + } + c[row * kRowsize + col] = sum; +int hit1c = L1((uint64)&c[row * kRowsize + col]); +int hit2c = L2((uint64)&c[row * kRowsize + col]); +int hit3c = L3((uint64)&c[row * kRowsize + col]); +if (traceme) {fprintf(stdout, "c%d%d%d\n", hit1c, hit2c, hit3c);} +//if ((row < 16) && (col < 16)) { +//fprintf(stdout, "[%d,%d] Misses L1/L2/L3 %10lld %10lld %10lld\n", row, col, L1misses, L2misses, L3misses); +//} + } + } +} + +void SimpleMultiplyColumnwise(const double* a, const double* b, double* c) { + for (int col = 0; col < kColsize; ++col) { + for (int row = 0; row < kRowsize; ++row) { +bool traceme = (col<2) & (row < 2); +traceme = false; +if (traceme) {fprintf(stdout, "[%d,%d] = ", row, col);} + double sum = 0.0; + for (int k = 0; k < kRowsize; ++k) { + sum += a[row * kRowsize + k] * b[k * kRowsize + col]; +int hit1a = L1((uint64)&a[row * kRowsize + k]); +int hit2a = L2((uint64)&a[row * kRowsize + k]); +int hit3a = L3((uint64)&a[row * kRowsize + k]); +int hit1b = L1((uint64)&b[k * kRowsize + col]); +int hit2b = L2((uint64)&b[k * kRowsize + col]); +int hit3b = L3((uint64)&b[k * kRowsize + col]); +if (traceme) {fprintf(stdout, "%016llx %016llx a%d%d%d b%d%d%d ", +(uint64)&a[row * kRowsize + k], (uint64)&b[k * kRowsize + col], +hit1a, hit2a, hit3a, hit1b, hit2b, hit3b);} + } + c[row * kRowsize + col] = sum; +int hit1c = L1((uint64)&c[row * kRowsize + col]); +int hit2c = L2((uint64)&c[row * kRowsize + col]); +int hit3c = L3((uint64)&c[row * kRowsize + col]); +if (traceme) {fprintf(stdout, "c%d%d%d\n", hit1c, hit2c, hit3c);} +//if ((row < 16) && (col < 16)) { +//fprintf(stdout, "[%d,%d] Misses L1/L2/L3 %10lld %10lld %10lld\n", row, col, L1misses, L2misses, L3misses); +//} + } + } +} + +// Just access 1 row and column, to time 1B pure multiplies. unroll to avoid dependant adds +void SimpleMultiplyOne(const double* a, const double* b, double* c) { + for (int row = 0; row < kRowsize; ++row) { + for (int col = 0; col < kColsize; ++col) { + double sum0 = 0.0; + double sum1 = 0.0; + double sum2 = 0.0; + double sum3 = 0.0; + //for (int k = 0; k < kRowsize; ++k) { + // sum += a[(row * kRowsize + k) & 1] * b[(k * kRowsize + col) & 1]; + //} + //c[(row * kRowsize + col) & 1] = sum; + for (int k = 0; k < kRowsize; k += 4) { + sum0 += a[0] * b[0]; + sum1 += a[1] * b[1]; + sum2 += a[2] * b[2]; + sum3 += a[3] * b[3]; + } + c[1] = sum0 + sum1 + sum2 + sum3; + } + } +} + + +void SimpleMultiplyUnrolled4(const double* a, const double* b, double* c) { + for (int row = 0; row < kRowsize; ++row) { + for (int col = 0; col < kColsize; ++col) { + c[row * kRowsize + col] = VectorSum4(&a[row * kRowsize + 0], + &b[0 * kRowsize + col], + kRowsize, kRowsize); +L1((uint64)&c[row * kRowsize + col]); +L2((uint64)&c[row * kRowsize + col]); +L3((uint64)&c[row * kRowsize + col]); + } + } +} + +void SimpleMultiplyUnrolled2(const double* a, const double* b, double* c) { + for (int row = 0; row < kRowsize; ++row) { + for (int col = 0; col < kColsize; ++col) { + c[row * kRowsize + col] = VectorSum2(&a[row * kRowsize + 0], + &b[0 * kRowsize + col], + kRowsize, kRowsize); +L1((uint64)&c[row * kRowsize + col]); +L2((uint64)&c[row * kRowsize + col]); +L3((uint64)&c[row * kRowsize + col]); + } + } +} + +void SimpleMultiplyUnrolled1(const double* a, const double* b, double* c) { + for (int row = 0; row < kRowsize; ++row) { + for (int col = 0; col < kColsize; ++col) { + c[row * kRowsize + col] = VectorSum1(&a[row * kRowsize + 0], + &b[0 * kRowsize + col], + kRowsize, kRowsize); +L1((uint64)&c[row * kRowsize + col]); +L2((uint64)&c[row * kRowsize + col]); +L3((uint64)&c[row * kRowsize + col]); + } + } +} + +void PointerMultiplyUnrolled4(const double* a, const double* b, double* c) { + const double* aptr = &a[0]; + const double* bptr = &b[0]; + for (int row = 0; row < kRowsize; ++row) { + for (int col = 0; col < kColsize; ++col) { + c[row * kRowsize + col] = VectorSum4(&a[row * kRowsize + 0], + &b[0 * kRowsize + col], + kRowsize, kRowsize); +L1((uint64)&c[row * kRowsize + col]); +L2((uint64)&c[row * kRowsize + col]); +L3((uint64)&c[row * kRowsize + col]); + } + } +} + +// Depends on c being zero'd on entry +void BlockMultiply(const double* a, const double* b, double* c) { + for (int row = 0; row < kRowsize; row += kBlocksize) { + for (int col = 0; col < kColsize; col += kBlocksize) { + // Calculate an 8x8 subarray of c + for (int subcol = 0; subcol < kBlocksize; ++subcol) { + for (int subrow = 0; subrow < kBlocksize; ++subrow) { + c[(row + subrow) * kRowsize + (col + subcol)] += + VectorSum1(&a[(row + subrow) * kRowsize + 0], + &b[0 * kRowsize + (col + subcol)], + kRowsize, kRowsize); +L1((uint64)&c[(row + subrow) * kRowsize + (col + subcol)]); +L2((uint64)&c[(row + subrow) * kRowsize + (col + subcol)]); +L3((uint64)&c[(row + subrow) * kRowsize + (col + subcol)]); + } + } + } + } +} + + +// Depends on c being zero'd on entry +void BlockMultiplyPtrUnrolled4(const double* a, const double* b, double* c) { + for (int row = 0; row < kRowsize; row += kBlocksize) { + for (int col = 0; col < kColsize; col += kBlocksize) { + // Calculate an 8x8 subarray of c + for (int subrow = 0; subrow < kBlocksize; ++subrow) { + for (int subcol = 0; subcol < kBlocksize; ++subcol) { + c[(row + subrow) * kRowsize + (col + subcol)] += + VectorSum4(&a[(row + subrow) * kRowsize + 0], + &b[0 * kRowsize + (col + subcol)], + kRowsize, kRowsize); +L1((uint64)&c[(row + subrow) * kRowsize + (col + subcol)]); +L2((uint64)&c[(row + subrow) * kRowsize + (col + subcol)]); +L3((uint64)&c[(row + subrow) * kRowsize + (col + subcol)]); + } + } + } + } +} + + +// Copy an NxN subarray to linear addresses, spreading across all L1 cache sets +// 8x8 => 64*8 bytes = 512 bytes or 8 sequential cache lines +// 16x16 => 256*8 = 2048 bytes or 32 sequential cache lines +// 32x32 => 1024*8 = 8192 bytes or 128 sequential cache lines (two lines per set in i3 L1 cache) +void Remap(const double* x, double* xprime) { + int k = 0; + for (int row = 0; row < kRemapsize; ++row) { + for (int col = 0; col < kRemapsize; col += 4) { + xprime[k + 0] = x[row * kRowsize + col + 0]; + xprime[k + 1] = x[row * kRowsize + col + 1]; + xprime[k + 2] = x[row * kRowsize + col + 2]; + xprime[k + 3] = x[row * kRowsize + col + 3]; +L1((uint64)&xprime[k + 0]); +L1((uint64)&xprime[k + 1]); +L1((uint64)&xprime[k + 2]); +L1((uint64)&xprime[k + 3]); +L1((uint64)&x[row * kRowsize + col + 0]); +L1((uint64)&x[row * kRowsize + col + 1]); +L1((uint64)&x[row * kRowsize + col + 2]); +L1((uint64)&x[row * kRowsize + col + 3]); + +L2((uint64)&xprime[k + 0]); +L2((uint64)&xprime[k + 1]); +L2((uint64)&xprime[k + 2]); +L2((uint64)&xprime[k + 3]); +L2((uint64)&x[row * kRowsize + col + 0]); +L2((uint64)&x[row * kRowsize + col + 1]); +L2((uint64)&x[row * kRowsize + col + 2]); +L2((uint64)&x[row * kRowsize + col + 3]); + +L3((uint64)&xprime[k + 0]); +L3((uint64)&xprime[k + 1]); +L3((uint64)&xprime[k + 2]); +L3((uint64)&xprime[k + 3]); +L3((uint64)&x[row * kRowsize + col + 0]); +L3((uint64)&x[row * kRowsize + col + 1]); +L3((uint64)&x[row * kRowsize + col + 2]); +L3((uint64)&x[row * kRowsize + col + 3]); + + k += 4; + } + } +} + +// Copy all NxN subarrays to linear addresses +void RemapAll(const double* x, double* xprime) { + int k = 0; + for (int row = 0; row < kRowsize; row += kRemapsize) { + for (int col = 0; col < kColsize; col += kRemapsize) { + Remap(&x[row * kRowsize + col], &xprime[k]); + k += (kRemapsize * kRemapsize); + } + } +} + +// Copy an NxN subarray from linear addresses +void UnRemap(const double* xprime, double* x) { + int k = 0; + for (int row = 0; row < kRemapsize; ++row) { + for (int col = 0; col < kRemapsize; col += 4) { + x[row * kRowsize + col + 0] = xprime[k + 0]; + x[row * kRowsize + col + 1] = xprime[k + 1]; + x[row * kRowsize + col + 2] = xprime[k + 2]; + x[row * kRowsize + col + 3] = xprime[k + 3]; +L1((uint64)&x[row * kRowsize + col + 0]); +L1((uint64)&x[row * kRowsize + col + 1]); +L1((uint64)&x[row * kRowsize + col + 2]); +L1((uint64)&x[row * kRowsize + col + 3]); +L1((uint64)&xprime[k + 0]); +L1((uint64)&xprime[k + 1]); +L1((uint64)&xprime[k + 2]); +L1((uint64)&xprime[k + 3]); + +L2((uint64)&x[row * kRowsize + col + 0]); +L2((uint64)&x[row * kRowsize + col + 1]); +L2((uint64)&x[row * kRowsize + col + 2]); +L2((uint64)&x[row * kRowsize + col + 3]); +L2((uint64)&xprime[k + 0]); +L2((uint64)&xprime[k + 1]); +L2((uint64)&xprime[k + 2]); +L2((uint64)&xprime[k + 3]); + +L3((uint64)&x[row * kRowsize + col + 0]); +L3((uint64)&x[row * kRowsize + col + 1]); +L3((uint64)&x[row * kRowsize + col + 2]); +L3((uint64)&x[row * kRowsize + col + 3]); +L3((uint64)&xprime[k + 0]); +L3((uint64)&xprime[k + 1]); +L3((uint64)&xprime[k + 2]); +L3((uint64)&xprime[k + 3]); + + k += 4; + } + } +} + +// Copy all NxN subarrays from linear addresses +void UnRemapAll(const double* xprime, double* x) { + int k = 0; + for (int row = 0; row < kRowsize; row += kRemapsize) { + for (int col = 0; col < kColsize; col += kRemapsize) { + UnRemap(&xprime[k], &x[row * kRowsize + col]); + k += (kRemapsize * kRemapsize); + } + } +} + +// Transpose matrix +void TransposeAll(const double* x, double* xprime) { + for (int row = 0; row < kRowsize; ++row) { + for (int col = 0; col < kColsize; ++col) { + xprime[col * kRowsize + row] = x[row * kRowsize + col]; +L1((uint64)&x[row * kRowsize + col]); +L2((uint64)&x[row * kRowsize + col]); +L3((uint64)&x[row * kRowsize + col]); +L1((uint64)&xprime[col * kRowsize + row]); +L2((uint64)&xprime[col * kRowsize + row]); +L3((uint64)&xprime[col * kRowsize + row]); + } + } +} + +// Transpose one block +void BlockTranspose(const double* x, double* xprime) { + for (int row = 0; row < kBlocksize; ++row) { + for (int col = 0; col < kBlocksize; col += 4) { + xprime[(col + 0) * kRowsize + row] = x[row * kRowsize + col + 0]; + xprime[(col + 1) * kRowsize + row] = x[row * kRowsize + col + 1]; + xprime[(col + 2) * kRowsize + row] = x[row * kRowsize + col + 2]; + xprime[(col + 3) * kRowsize + row] = x[row * kRowsize + col + 3]; + +L1((uint64)&x[row * kRowsize + col + 0]); +L2((uint64)&x[row * kRowsize + col + 0]); +L3((uint64)&x[row * kRowsize + col + 0]); +L1((uint64)&xprime[(col + 0) * kRowsize + row]); +L2((uint64)&xprime[(col + 0) * kRowsize + row]); +L3((uint64)&xprime[(col + 0) * kRowsize + row]); + +L1((uint64)&x[row * kRowsize + col + 1]); +L2((uint64)&x[row * kRowsize + col + 1]); +L3((uint64)&x[row * kRowsize + col + 1]); +L1((uint64)&xprime[(col + 1) * kRowsize + row]); +L2((uint64)&xprime[(col + 1) * kRowsize + row]); +L3((uint64)&xprime[(col + 1) * kRowsize + row]); + +L1((uint64)&x[row * kRowsize + col + 2]); +L2((uint64)&x[row * kRowsize + col + 2]); +L3((uint64)&x[row * kRowsize + col + 2]); +L1((uint64)&xprime[(col + 2) * kRowsize + row]); +L2((uint64)&xprime[(col + 2) * kRowsize + row]); +L3((uint64)&xprime[(col + 2) * kRowsize + row]); + +L1((uint64)&x[row * kRowsize + col + 3]); +L2((uint64)&x[row * kRowsize + col + 3]); +L3((uint64)&x[row * kRowsize + col + 3]); +L1((uint64)&xprime[(col + 3) * kRowsize + row]); +L2((uint64)&xprime[(col + 3) * kRowsize + row]); +L3((uint64)&xprime[(col + 3) * kRowsize + row]); + + } + } +} + +// Block Transpose matrix +void BlockTransposeAll(const double* x, double* xprime) { + for (int row = 0; row < kRowsize; row += kBlocksize) { + for (int col = 0; col < kColsize; col += kBlocksize) { + BlockTranspose(&x[row * kRowsize + col], &xprime[col * kRowsize + row]); + } + } +} + + +// Remap input arrays to spread Remap blocks across successive cache lines, +// multiply, then remap output +// Depends on c being zero'd on entry +void BlockMultiplyRemap(const double* a, const double* b, double* c) { + RemapAll(a, aa); + RemapAll(b, bb); +#if 1 + for (int row = 0; row < kRowsize; row += kRemapsize) { + for (int col = 0; col < kColsize; col += kRemapsize) { + // cc block starts at row * kRowsize + col * kRemapsize + double* ccptr = &cc[(row * kRowsize) + (col * kRemapsize)]; + + for (int k = 0; k < kRowsize; k += kRemapsize) { + // aa block starts at row * kRowsize + k * kRemapsize + // bb block starts at(k * kRowsize + col * kRemapsize + const double* aaptr = &aa[(row * kRowsize) + (k * kRemapsize)]; + const double* bbptr = &bb[(k * kRowsize) + (col * kRemapsize)]; + + // Calculate an NxN subarray of c + int kk = 0; + for (int subrow = 0; subrow < kRemapsize; ++subrow) { + for (int subcol = 0; subcol < kRemapsize; ++subcol) { + ccptr[kk] += VectorSum4(&aaptr[subrow * kRemapsize + 0], + &bbptr[0 * kRemapsize + subcol], + kRemapsize, kRemapsize); +L1((uint64)&ccptr[kk]); +L2((uint64)&ccptr[kk]); +L3((uint64)&ccptr[kk]); + ++kk; + } + } + } + } + } +#endif + RemapAll(cc, c); +} + + +// Transpose second input array to be in column-major order +void SimpleMultiplyTranspose(const double* a, const double* b, double* c) { + TransposeAll(b, bb); + for (int row = 0; row < kRowsize; ++row) { + for (int col = 0; col < kColsize; ++col) { + c[row * kRowsize + col] = VectorSum1(&a[row * kRowsize + 0], + &bb[col * kRowsize + 0], + kRowsize, 1); +L1((uint64)&c[row * kRowsize + col]); +L2((uint64)&c[row * kRowsize + col]); +L3((uint64)&c[row * kRowsize + col]); + } + } +} + +// Transpose second input array to be in column-major order +void SimpleMultiplyTransposeFast(const double* a, const double* b, double* c) { + BlockTransposeAll(b, bb); + for (int row = 0; row < kRowsize; ++row) { + for (int col = 0; col < kColsize; ++col) { + c[row * kRowsize + col] = VectorSum4(&a[row * kRowsize + 0], + &bb[col * kRowsize + 0], + kRowsize, 1); +L1((uint64)&c[row * kRowsize + col]); +L2((uint64)&c[row * kRowsize + col]); +L3((uint64)&c[row * kRowsize + col]); + } + } +} + + +double* PageAlign(double* p) { + double* p_local = p + 511; + *reinterpret_cast(&p_local) &= ~0xfff; +//// fprintf(stdout, "%016llx %016llx\n", (uint64)p, (uint64)p_local); + return p_local; +} + +int main(int argc, const char** argv) { +kutrace::mark_a("alloc"); + double* abase = new double[kRowsize * kColsize + 512]; + double* bbase = new double[kRowsize * kColsize + 512]; + double* cbase = new double[kRowsize * kColsize + 512]; + double* a = PageAlign(abase); + double* b = PageAlign(bbase); + double* c = PageAlign(cbase); + double* aabase = new double[kRowsize * kColsize + 512]; + double* bbbase = new double[kRowsize * kColsize + 512]; + double* ccbase = new double[kRowsize * kColsize + 512]; + aa = PageAlign(aabase); + bb = PageAlign(bbbase); + cc = PageAlign(ccbase); + +kutrace::mark_a("init"); + SimpleInit(a); + SimpleInit(b); + InitTags(); + + // Test remap +kutrace::mark_a("remap"); + RemapAll(a, aa); + UnRemapAll(aa, c); + fprintf(stdout, "a sum=%18.9f\n", SimpleSum(a)); + fprintf(stdout, "aa sum=%18.9f\n", SimpleSum(aa)); + fprintf(stdout, "c sum=%18.9f\n", SimpleSum(c)); + fprintf(stdout, "%s\n", EqualArray(a, c) ? "Equal" : "Not equal"); + fprintf(stdout, "Remap Misses L1/L2/L3 %10lld %10lld %10lld\n", L1misses, L2misses, L3misses); + InitTags(); + + // Test transpose +kutrace::mark_a("trans"); + TransposeAll(b, bb); + TransposeAll(bb, c); + fprintf(stdout, "b sum=%18.9f\n", SimpleSum(b)); + fprintf(stdout, "bb sum=%18.9f\n", SimpleSum(bb)); + fprintf(stdout, "c sum=%18.9f\n", SimpleSum(c)); + fprintf(stdout, "%s\n", EqualArray(b, c) ? "Equal" : "Not equal"); + fprintf(stdout, "Transpose Misses L1/L2/L3 %10lld %10lld %10lld\n", L1misses, L2misses, L3misses); + InitTags(); + +kutrace::mark_a("btrans"); + BlockTransposeAll(b, bb); + BlockTransposeAll(bb, c); + fprintf(stdout, "b sum=%18.9f\n", SimpleSum(b)); + fprintf(stdout, "bb sum=%18.9f\n", SimpleSum(bb)); + fprintf(stdout, "c sum=%18.9f\n", SimpleSum(c)); + fprintf(stdout, "%s\n", EqualArray(b, c) ? "Equal" : "Not equal"); + fprintf(stdout, "BlockTranspose Misses L1/L2/L3 %10lld %10lld %10lld\n", L1misses, L2misses, L3misses); + InitTags(); + + +kutrace::mark_a("simp"); + TimeMe("SimpleMultiply ", SimpleMultiply, a, b, c); +kutrace::mark_a("simpc"); + TimeMe("SimpleMultiplyColumnwise ", SimpleMultiplyColumnwise, a, b, c); + +#if 0 + TimeMe("SimpleMultiplyUnrolled1 ", SimpleMultiplyUnrolled1, a, b, c); + TimeMe("SimpleMultiplyUnrolled2 ", SimpleMultiplyUnrolled2, a, b, c); + TimeMe("SimpleMultiplyUnrolled4 ", SimpleMultiplyUnrolled4, a, b, c); + TimeMe("PointerMultiplyUnrolled4 ", PointerMultiplyUnrolled4, a, b, c); + + ZeroInit(c); + TimeMe("BlockMultiply ", BlockMultiply, a, b, c); + ZeroInit(c); + TimeMe("BlockMultiplyPtrUnrolled4 ", BlockMultiplyPtrUnrolled4, a, b, c); +#endif + +kutrace::mark_a("simpt"); + TimeMe("SimpleMultiplyTranspose ", SimpleMultiplyTranspose, a, b, c); + ZeroInit(c); +kutrace::mark_a("simptf"); + TimeMe("SimpleMultiplyTransposeFast", SimpleMultiplyTransposeFast, a, b, c); + ZeroInit(c); +kutrace::mark_a("simpr"); + TimeMe("BlockMultiplyRemap ", BlockMultiplyRemap, a, b, c); + ZeroInit(c); +kutrace::mark_a("simp1"); + TimeMe("IGNORE SimpleMultiplyOne ", SimpleMultiplyOne, a, b, c); + + + delete[] ccbase; + delete[] bbbase; + delete[] aabase; + delete[] cbase; + delete[] bbase; + delete[] abase; + return 0; +} + diff --git a/book-user-code/memhog_1.cc b/book-user-code/memhog_1.cc new file mode 100644 index 000000000000..5647e2c653bd --- /dev/null +++ b/book-user-code/memhog_1.cc @@ -0,0 +1,29 @@ +// Fill 32KB L1 cache infinite loop +// Copyright 2021 Richard L. Sites + +#include +#include "basetypes.h" +#include "dclab_trace_lib.h" + +static const int kSize = 7 * 1024; + +int main (int argc, const char** argv) { + fprintf(stdout, "Starting memory L1 hog.\n"); + uint64* buffer = new uint64[kSize]; + + uint64 sum = 0; + for (int k = 0; k < 100000000; ++k) { + for (int i = 0; i < kSize; ++i) { + sum += buffer[i]; + } + // Each pass is just 3-4 usec, so we get a lot of chatter marking every pass. + // Just do every fourth pass. + if ((k & 3) == 0) { + dclab_trace::mark_d((k & 255) + 1000); + } + } + + printf("sum %lu\n", sum); + return 0; +} + diff --git a/book-user-code/memhog_1_trace.cc b/book-user-code/memhog_1_trace.cc new file mode 100644 index 000000000000..6ecf6708ef28 --- /dev/null +++ b/book-user-code/memhog_1_trace.cc @@ -0,0 +1,45 @@ +// Fill 32KB L1 cache infinite loop +// Copyright 2021 Richard L. Sites + +// SHORT INSTRUMENTED VERSION + +#include +#include + +#include "basetypes.h" +#include "dclab_trace_lib.h" + +static const int kSize = 8 * 1024; // count of 8-byte U64s + +int main (int argc, const char** argv) { + fprintf(stdout, "Starting instrumented memory L1 hog.\n"); + uint64* buffer = new uint64[kSize]; + + //Exit immediately if the module is not loaded + if (!dclab_trace::test()) { + fprintf(stderr, "FAIL, module dclab_trace_mod.ko not loaded\n"); + return 0; + } + + // Executable image name, backscan for slash if any + const char* slash = strrchr(argv[0], '/'); + dclab_trace::go((slash == NULL) ? argv[0] : slash + 1); + + uint64 sum = 0; + for (int k = 0; k < 40000; ++k) { + for (int i = 0; i < kSize; ++i) { + sum += buffer[i]; + buffer[i] = sum; + } + // Each pass is just 3-4 usec, so we get a lot of chatter marking every pass. + // Just do every fourth pass. + if (true || (k & 3) == 0) {dclab_trace::mark_d((k & 255) + 1000);} + } + + fprintf(stderr, "memhog_1_trace.trace written\n"); + dclab_trace::stop("memhog_1_trace.trace"); // DESIGN BUG: exits + + printf("sum %lu\n", sum); + return 0; +} + diff --git a/book-user-code/memhog_1k.cc b/book-user-code/memhog_1k.cc new file mode 100644 index 000000000000..8691322d2e83 --- /dev/null +++ b/book-user-code/memhog_1k.cc @@ -0,0 +1,29 @@ +// Fill 32KB L1 cache infinite loop +// Copyright 2021 Richard L. Sites + +#include +#include "basetypes.h" +#include "kutrace_lib.h" + +static const int kSize = 7 * 1024; + +int main (int argc, const char** argv) { + fprintf(stdout, "Starting memory L1 hog.\n"); + uint64* buffer = new uint64[kSize]; + + uint64 sum = 0; + for (int k = 0; k < 100000000; ++k) { + for (int i = 0; i < kSize; ++i) { + sum += buffer[i]; + } + // Each pass is just 3-4 usec, so we get a lot of chatter marking every pass. + // Just do every tenth pass. + if ((k % 10) == 0) { + kutrace::mark_d(k % 1000); + } + } + + printf("sum %lu\n", sum); + return 0; +} + diff --git a/book-user-code/memhog_2.cc b/book-user-code/memhog_2.cc new file mode 100644 index 000000000000..0be4ecae3e3d --- /dev/null +++ b/book-user-code/memhog_2.cc @@ -0,0 +1,54 @@ +// Fill 128KB of L2 cache infinite loop +// Copyright 2021 Richard L. Sites + +#include +#include // nanosleep + +#include "basetypes.h" +#include "kutrace_lib.h" + +static const int kSize = 32 * 1024; // count of 8-byte U64s + +// Sleep for n milliseconds +void msleep(int msec) { + struct timespec ts; + ts.tv_sec = msec / 1000; + ts.tv_nsec = (msec % 1000) * 1000000; + nanosleep(&ts, NULL); +} + +static const int kIterations = 70 * 1000; // NOTE: 70K ~1msec per iter +double fdiv_wait(int iter) { + double divd = 123456789.0; + for (int k = 0; k < iter; ++k) { + for (int i = 0; i < kIterations; ++i) { + divd /= 1.0001; + divd /= 0.9999; + } + } + kutrace::mark_d(666); + return divd; // Make live +} + + +int main (int argc, const char** argv) { + fprintf(stdout, "Starting memory L2 hog.\n"); + uint64* buffer = new uint64[kSize]; + + uint64 sum = 0; + for (int k = 0; k < 100000000; ++k) { + for (int i = 0; i < kSize; ++i) { + sum += buffer[i]; + buffer[i] = sum; + } + kutrace::mark_d((k & 255) + 2000); + + // Wait for 1 msec between every 4 passes + // if ((k & 3) == 0) {msleep(1);} + if ((k & 3) == 0) {fdiv_wait(1);} + } + + printf("sum %llu\n", sum); // Make live + return 0; +} + diff --git a/book-user-code/memhog_3.cc b/book-user-code/memhog_3.cc new file mode 100644 index 000000000000..52836dfad76f --- /dev/null +++ b/book-user-code/memhog_3.cc @@ -0,0 +1,57 @@ +// Fill 4MB of L3 cache infinite loop +// Copyright 2021 Richard L. Sites +// +// compile with g++ -O2 memhog_3.cc kutrace_lib.cc -o memhog3 +// + +#include +#include // nanosleep + +#include "basetypes.h" +#include "kutrace_lib.h" + +static const int kSize = 512 * 1024; // 4MB, count of 8-byte U64s + +// Sleep for n milliseconds +void msleep(int msec) { + struct timespec ts; + ts.tv_sec = msec / 1000; + ts.tv_nsec = (msec % 1000) * 1000000; + nanosleep(&ts, NULL); +} + +static const int kIterations = 60 * 1000; // NOTE: 60K ~1msec per iter +double fdiv_wait(int iter) { + double divd = 123456789.0; + for (int k = 0; k < iter; ++k) { + for (int i = 0; i < kIterations; ++i) { + divd /= 1.0001; + divd /= 0.9999; + } + } + //kutrace::mark_d(666); + return divd; // Make live +} + + +int main (int argc, const char** argv) { + fprintf(stdout, "Starting memory L3 hog.\n"); + uint64* buffer = new uint64[kSize]; + + uint64 sum = 0; + for (int k = 0; k < 100000000; ++k) { + for (int i = 0; i < kSize; ++i) { + sum += buffer[i]; + buffer[i] = sum; + } + kutrace::mark_d(k % 1000); + + // Wait for 10 msec between every 10 passes + if ((k % 10) == 0) {msleep(10);} + ///if ((k % 10) == 0) {fdiv_wait();} + } + + printf("sum %llu\n", sum); // Make live + return 0; +} + diff --git a/book-user-code/memhog_ram.cc b/book-user-code/memhog_ram.cc new file mode 100644 index 000000000000..aea80f83f413 --- /dev/null +++ b/book-user-code/memhog_ram.cc @@ -0,0 +1,61 @@ +// OverFill 20MB of L3 cache infinite loop +// Copyright 2021 Richard L. Sites +// +// compile with g++ -O2 memhog_ram.cc kutrace_lib.cc -o memhog_ram +// + +#include +#include // atoi +#include // nanosleep + +#include "basetypes.h" +#include "kutrace_lib.h" + +static const int kSize = 5 * 512 * 1024; // 20MB, count of 8-byte U64s + +static int msec_wait = 20; + +// Sleep for n milliseconds +void msleep(int msec) { + struct timespec ts; + ts.tv_sec = msec / 1000; + ts.tv_nsec = (msec % 1000) * 1000000; + nanosleep(&ts, NULL); +} + +static const int kIterations = 60 * 1000; // NOTE: 60K ~5msec per iter +double fdiv_wait(int iter) { + double divd = 123456789.0; + for (int k = 0; k < iter; ++k) { + for (int i = 0; i < kIterations; ++i) { + divd /= 1.0001; + divd /= 0.9999; + } + } + return divd; // Make live +} + + +int main (int argc, const char** argv) { + if (1 < argc) {msec_wait = atoi(argv[1]);} + fprintf(stdout, "Starting memory RAM hog.\n"); + + uint64* buffer = new uint64[kSize]; + for (int i = 0; i < kSize; ++i) {buffer[i] = i;} + + uint64 sum = 0; + for (int k = 0; k < 10000; ++k) { // Approx 30 seconds + for (int i = 0; i < kSize; ++i) { + sum += buffer[i]; + buffer[i] = sum; + } + if ((k & 3) == 0) {kutrace::mark_d(k);} + + // Wait for ~20 msec between every 10 passes + if ((k % 10) == 0) {msleep(msec_wait);} + } + + printf("sum %llu\n", sum); // Make live + return 0; +} + diff --git a/book-user-code/mutex2.cc b/book-user-code/mutex2.cc new file mode 100644 index 000000000000..8a8f05af7da2 --- /dev/null +++ b/book-user-code/mutex2.cc @@ -0,0 +1,185 @@ +// Routines to deal with simple mutex lock +// Copyright 2021 Richard L. Sites + +#include +#include +#include // for syscall +#include // for SYS_xxx definitions +#include + +#include "basetypes.h" +#include "dclab_log.h" // for GetUsec() +#include "fancylock2.h" +#include "kutrace_lib.h" +#include "mutex2.h" +#include "timecounters.h" + +static const int SPIN_ITER = 8; +static const int SPIN_USEC = 5; + +// Global variable. This only has 0->1 transitions after startup and we don't +// mind losing a few of those, so no threading issues +// 16K bits in this array -- we don't use the two high bits of lnamehash +static uint64 lock_name_added[256] = { 0 }; + +void TraceLockName(uint16 lnamehash, const char* filename) { + // We are tracing. Add the name of this lock if not already added + uint64 bitmask = 1 << (lnamehash & 63); + int subscr = (lnamehash >> 6) & 255; + if ((lock_name_added[subscr] & bitmask) != 0) {return;} + + // Remember that we added the name (do this first to mostly avoid 2 threads bothering) + lock_name_added[subscr] |= bitmask; + // Add the lock name to the the KUtrace + uint64 temp[8]; + memset(temp, 0, 8 * sizeof(u64)); + memcpy((char*)(&temp[1]), filename, 22); + u64 wordlen = 4; + u64 n_with_length = KUTRACE_LOCKNAME + (wordlen * 16); + // Build the initial word + // T N ARG + temp[0] = (0l << 44) | (n_with_length << 32) | (lnamehash); + kutrace::DoControl(KUTRACE_CMD_INSERTN, (u64)&temp[0]); +fprintf(stderr, "Lock name[%04x] %s\n", lnamehash, (const char*)&temp[1]); +} + +// Spin a little until lock is available or enough usec pass +// Return true if still locked +bool AcquireSpin(int whoami, int64 start_acquire, FancyLock2::FancyLock2Struct* fstruct) { + bool old_locked = true; + do { + for (int i = 0; i < SPIN_ITER; ++i) { + if (fstruct->lock == 0) {break;} + Pause(); // Let any hyperthread in, allow reduced power, slow speculation + } + // Lock might be available (0) + // Try again to get the lock + old_locked = __atomic_test_and_set(&fstruct->lock, __ATOMIC_ACQUIRE); + if (!old_locked) {break;} + } while ((GetUsec() - start_acquire) <= SPIN_USEC); + return old_locked; +} + +// Wait until lock is available +// Return true if still locked (will always be false on return) +bool AcquireWait(int whoami, int64 start_acquire, FancyLock2::FancyLock2Struct* fstruct) { + bool old_locked = true; + + // Add us to the number of waiters (not spinners) + __atomic_add_fetch(&fstruct->waiters, 1, __ATOMIC_RELAXED); + + do { + // Do futex wait until lock is no longer held (!=1) + //kutrace::mark_c("wait"); + syscall(SYS_futex, &fstruct->lock, FUTEX_WAIT, 1, NULL, NULL, 0); + //kutrace::mark_c("/wait"); + + // Done futex waiting -- lock is at least temporarily available (0) + // Try again to get the lock + old_locked = __atomic_test_and_set(&fstruct->lock, __ATOMIC_ACQUIRE); + } while (old_locked); + + // Remove us from the number of waiters + __atomic_sub_fetch(&fstruct->waiters, 1, __ATOMIC_RELAXED); + + return old_locked; +} + +//---------------------------------------------------------------------------// +// Exported routines // +//---------------------------------------------------------------------------// + +// Acquire a lock, including a memory barrier to prevent hoisting loads +// fstruct->lock = 0 is available, fstruct->lock = 1 is held by someone else +// whoami is any non-negative ID except the largest int +// Returns number of usec spent acquiring +int32 Acquirelock(int whoami, FancyLock2* flock) { + FancyLock2::FancyLock2Struct* fstruct = &flock->fancy2struct_; + + //-----------------------------------// + // Quick try to get uncontended lock // + //-----------------------------------// + bool old_locked = __atomic_test_and_set(&fstruct->lock, __ATOMIC_ACQUIRE); + if (!old_locked) { + // Success. We got the lock with no contention + // Nonetheless there may be waiters outstanding who have not yet retried + // Some new waiters may arrive during or after this trace entry but they will + // generate noacquire entries + if (0 < fstruct->waiters) { + // Trace acquire event + uint64 words_added = kutrace::addevent(KUTRACE_LOCKACQUIRE, fstruct->lnamehash); + } + + fstruct->holder = whoami; // Positive = uncontended + kutrace::mark_d(0); // Microseconds to acquire + return 0; + } + + //-----------------------------------// + // Contended lock, we did 1=>1 above // + //-----------------------------------// + // Accumulate contended-acquire time + int64 start_acquire = GetUsec(); + // Trace contended-lock acquire failed event + uint64 words_added = kutrace::addevent(KUTRACE_LOCKNOACQUIRE, fstruct->lnamehash); + + // Add the lock name if tracing and not already added + if (words_added == 1) {TraceLockName(fstruct->lnamehash, fstruct->filename);} + + do { // Outermost do + old_locked = AcquireSpin(whoami, start_acquire, fstruct); + if (!old_locked) {break;} + + old_locked = AcquireWait(whoami, start_acquire, fstruct); + if (!old_locked) {break;} + } while (true); // Outermost do + + // We got the lock + // Success. We got the lock. Negative indicates contended acquire + fstruct->holder = ~whoami; // Bit-complement = contended + + // Trace contended lock-acquire success event + kutrace::addevent(KUTRACE_LOCKACQUIRE, fstruct->lnamehash); + // Accumulate contended-acquire time + int32 elapsed_acquire = (int32)(GetUsec() - start_acquire); + //-----------------------------------// + // End Contended lock // + //-----------------------------------// + + flock->IncrCounts(elapsed_acquire); + kutrace::mark_d(elapsed_acquire); // Microseconds to acquire + return elapsed_acquire; +} + +// Release a lock, including a memory barrier to prevent sinking stores +void Releaselock(FancyLock2* flock) { + FancyLock2::FancyLock2Struct* fstruct = &flock->fancy2struct_; + bool was_contended_acquire = (fstruct->holder < 0); + fstruct->holder = 0x80000000; + // Do 1=>0 + __atomic_clear(&fstruct->lock, __ATOMIC_RELEASE); + + if (was_contended_acquire || (0 < fstruct->waiters)) { + // Trace contended-lock free event + kutrace::addevent(KUTRACE_LOCKWAKEUP, fstruct->lnamehash); + // Wake up some (<=4) possible other futex waiters + //kutrace::mark_b("wake"); + syscall(SYS_futex, &fstruct->lock, FUTEX_WAKE, 4, NULL, NULL, 0); + //kutrace::mark_b("/wake"); + } +} + + +// The constructor acquires the lock and the destructor releases it. +// Thus, just declaring one of these in a block makes the block run *only* when +// holding the lock and then reliably release it at block exit +// whoami is any non-negative ID except the largest int +Mutex2::Mutex2(int whoami, FancyLock2* flock) { + flock_ = flock; + int32 usec = Acquirelock(whoami, flock_); // usec not used +} + +Mutex2::~Mutex2() { + Releaselock(flock_); +} + diff --git a/book-user-code/mutex2.h b/book-user-code/mutex2.h new file mode 100644 index 000000000000..83662b2acb93 --- /dev/null +++ b/book-user-code/mutex2.h @@ -0,0 +1,36 @@ +// Routines to deal with simple locks, using Gnu C intrinsics +// Copyright 2021 Richard L. Sites +// + +#ifndef __MUTEX2_H__ +#define __MUTEX2_H__ + +#include "basetypes.h" +#include "fancylock2.h" + +// See fancylock2.h +//#define DEFINE_FANCYLOCK2(name, waitus90ile, queue90ile) \ +// FancyLock name(__FILE__, __LINE__, waitus90ile, queue90ile) + +// The constructor for this acquires the lock and the destructor releases it. +// Thus, just declaring one of these in a block makes the block run *only* when +// holding the lock and then reliably release it at block exit +// whoami is any non-negative ID except the largest int +class Mutex2 { +public: + Mutex2(int whoami, FancyLock2* flock); // Acquire lock + ~Mutex2(); // Release lock + + // Acquire a lock, including a memory barrier to prevent hoisting loads + // Returns number of usec spent acquiring + //int32 Acquirelock(FancyLock2::FancyLock2Struct* fstruct); + + // Release a lock, including a memory barrier to prevent sinking stores + //void Releaselock(FancyLock2::FancyLock2Struct* fstruct); + + // Only data -- Just a pointer to 64-byte fancy lock structure + FancyLock2* flock_; +}; + +#endif // __MUTEX2_H__ + diff --git a/book-user-code/mystery0.cc b/book-user-code/mystery0.cc new file mode 100644 index 000000000000..8f604337c2df --- /dev/null +++ b/book-user-code/mystery0.cc @@ -0,0 +1,24 @@ +// Sample mystery program to measure how long an add takes. Flawed. +// Copyright 2021 Richard L. Sites + +#include +#include +#include +#include "timecounters.h" + +static const int kIterations = 1000 * 1000000; + +int main (int argc, const char** argv) { + uint64_t sum = 0; + + int64_t startcy = GetCycles(); + for (int i = 0; i < kIterations; ++i) { + sum += 1; + } + int64_t elapsed = GetCycles() - startcy; + + double felapsed = elapsed; + fprintf(stdout, "%d iterations, %lu cycles, %4.2f cycles/iteration\n", + kIterations, elapsed, felapsed / kIterations); + return 0; +} diff --git a/book-user-code/mystery1.cc b/book-user-code/mystery1.cc new file mode 100644 index 000000000000..f46713c2de18 --- /dev/null +++ b/book-user-code/mystery1.cc @@ -0,0 +1,30 @@ +// Sample mystery program to measure how long an add takes. Flawed. +// Copyright 2021 Richard L. Sites + +#include +#include +#include // time() +#include "timecounters.h" + +static const int kIterations = 1000 * 1000000; + +int main (int argc, const char** argv) { + uint64_t sum = 0; + + time_t t = time(NULL); // A number that the compiler does not know + int incr = t & 255; // Unknown increment 0..255 + + int64_t startcy = GetCycles(); + for (int i = 0; i < kIterations; ++i) { + sum += incr; + } + int64_t elapsed = GetCycles() - startcy; + double felapsed = elapsed; + + fprintf(stdout, "%d iterations, %lu cycles, %4.2f cycles/iteration\n", + kIterations, elapsed, felapsed / kIterations); + + // fprintf(stdout, "%lu %lu\n", t, sum); // Make sum live + + return 0; +} diff --git a/book-user-code/mystery1_x.cc b/book-user-code/mystery1_x.cc new file mode 100644 index 000000000000..188039c12770 --- /dev/null +++ b/book-user-code/mystery1_x.cc @@ -0,0 +1,30 @@ +// Sample mystery program to measure how long an add takes. Flawed. +// Copyright 2021 Richard L. Sites + +#include +#include +#include // time() +#include "timers.h" + +static const int kIterations = 1000 * 1000000; + +int main (int argc, const char** argv) { + uint64_t sum = 0; + + time_t t = time(NULL); // A number that the compiler does not know + int incr = t & 255; // Unknown increment 0..255 + + int64_t startcy = GetCounter(); + for (int i = 0; i < kIterations; ++i) { + sum += incr; + } + int64_t elapsed = GetCounter() - startcy; + double felapsed = elapsed; + + fprintf(stdout, "%d iterations, %lu cycles, %4.2f cycles/iteration\n", + kIterations, elapsed, felapsed / kIterations); + + // fprintf(stdout, "%lu %lu\n", t, sum); // Make sum live + + return 0; +} diff --git a/book-user-code/mystery2.cc b/book-user-code/mystery2.cc new file mode 100644 index 000000000000..7f8041a2477f --- /dev/null +++ b/book-user-code/mystery2.cc @@ -0,0 +1,357 @@ +// Little program to exercise caches and memory. Used to determine +// cache line size +// cache total size +// cache set associativity +// for each level of the cache +// +// Copyright 2021 Richard L. Sites +// +// General structure is to fill up the cache(s) with data we don't want, to +// force the next step to be all cache misses, then time accessing data +// we do want. In doing the timing, we need to protect against CPUs that +// issue multiple instructions per cycle, and CPUs that can have many loads +// outstanding at once, and CPUs that can prefetch cache line N+1 soon after +// line N is accessed. All these can give misleadingly low cache timings. +// +// There are three forms of the timing loop here: +// Form 1 does naive accessing of data and is subject to all three problems +// Form 2 builds a linear linked list of items in memory, so that each load +// from memory is dependent on the completion of the previous load. It avoids +// the first two problems but is sitll subject to prefetching distortion. +// Form 3 builds a scrambled linked list whose purpose is to defeat any +// prefetching hardware. + + +#include +#include +#include +#include // for time() + +#include "basetypes.h" +#include "polynomial.h" +#include "timecounters.h" + +// We use a couple of fast pseudo-random generators that are based on standard +// cyclic reduncy check arithmetic. Look in Wikipedia for more information on +// CRC calculations. +// +// The particular CRC style we use is a left shift by one bit, followed by an +// XOR of a specific CRC bit pattern if the bit shifted out is 1. This mimics +// long-used hardware that feeds back the top bit to XOR taps at specific +// places in a shift register. Our use here is just to get a bunch of non-zero +// bit patterns that have poor correlation from one to the next. +// +// Rather than the simple +// if (highbit is 1) +// x = (x << 1) ^ bit_pattern +// else +// x = (x << 1) +// +// we make this calculation branch-free and therefore fast by ANDing the bit +// pattern against either 00000... or 11111..., depending on the value of the +// high bit. This is accomplished by an arithmetic right shift that sign- +// extends the input value. The entire expansion is four inline instrucitons: +// shift, shift, and, xor. +// + +// x must be of type uint8 +// #define POLY8 (0x1d) // From CRC-8-SAE J1850 +// #define POLYSHIFT8(x) ( ((x) << 1) ^ ((static_cast((x)) >> 7) & POLY8) ) +// #define POLYINIT8 (0xffu) + +// POLY8 is a crc-based calculation that cycles through 255 byte values, +// excluding zero so long as the initial value is non-zero. If the initial +// value is zero, it cycles at zero every time. +// +// Here are the values if started at POLYINIT8 = 0xff +// ff e3 db ab 4b 96 31 62 c4 95 37 6e dc a5 57 ae +// 41 82 19 32 64 c8 8d 07 0e 1c 38 70 e0 dd a7 53 +// a6 51 a2 59 b2 79 f2 f9 ef c3 9b 2b 56 ac 45 8a +// 09 12 24 48 90 3d 7a f4 f5 f7 f3 fb eb cb 8b 0b +// 16 2c 58 b0 7d fa e9 cf 83 1b 36 6c d8 ad 47 8e +// 01 02 04 08 10 20 40 80 1d 3a 74 e8 cd 87 13 26 +// 4c 98 2d 5a b4 75 ea c9 8f 03 06 0c 18 30 60 c0 +// 9d 27 4e 9c 25 4a 94 35 6a d4 b5 77 ee c1 9f 23 +// 46 8c 05 0a 14 28 50 a0 5d ba 69 d2 b9 6f de a1 +// 5f be 61 c2 99 2f 5e bc 65 ca 89 0f 1e 3c 78 f0 +// fd e7 d3 bb 6b d6 b1 7f fe e1 df a3 5b b6 71 e2 +// d9 af 43 86 11 22 44 88 0d 1a 34 68 d0 bd 67 ce +// 81 1f 3e 7c f8 ed c7 93 3b 76 ec c5 97 33 66 cc +// 85 17 2e 5c b8 6d da a9 4f 9e 21 42 84 15 2a 54 +// a8 4d 9a 29 52 a4 55 aa 49 92 39 72 e4 d5 b7 73 +// e6 d1 bf 63 c6 91 3f 7e fc e5 d7 b3 7b f6 f1 ff + + +static const int kPageSize = 4096; // Must be a power of two +static const int kPageSizeMask = kPageSize - 1; + +// Make an array bigger than any expected cache size +static const int kMaxArraySize = 40 * 1024 * 1024; + +// Minimum useful cache line size is twice sizeof(void*), 16 bytes +// Maximum useful cache line size is page size, assumed here to be 4KB +static const int kMinLgStride = 4; +static const int kMaxLgStride = 12; + +// We will read and write these pairs, allocated at different strides +struct Pair { + Pair* next; + int64 data; +}; + +// We use this to make variables live by never printing them, but make sure +// the compiler doesn't know that. +static time_t gNeverZero = 1; + +// Allocate a byte array of given size, aligned on a page boundary +// Caller will call free(rawptr) +uint8* AllocPageAligned(int bytesize, uint8** rawptr) { + int newsize = bytesize + kPageSizeMask; + *rawptr = reinterpret_cast(malloc(newsize)); + uintptr_t temp = reinterpret_cast(*rawptr); + temp = (temp + kPageSizeMask) & ~kPageSizeMask; + return reinterpret_cast(temp); +} + +// Zero a byte array +void ZeroAll(uint8* ptr, int bytesize) { + memset(ptr, 0, bytesize); +} + +// Fill byte array with non-zero pseudo-random bits +void PseudoAll(uint8* ptr, int bytesize) { + uint32* wordptr = reinterpret_cast(ptr); + int wordcount = bytesize >> 2; + uint32 x = POLYINIT32; + for (int i = 0; i < wordcount; ++i) { + *wordptr++ = x; + x = POLYSHIFT32(x); + } +} + +// In a byte array, create a linked list of Pairs, spaced by the given stride. +// Pairs are generally allocated near the front of the array first and near +// the end of the array last. The list will have floor(bytesize / bytestride) +// elements. The last element's next field is NULL and all the data fields are +// zero. +// +// ptr must be aligned on a multiple of sizeof(void*), i.e. 8 for a 64-bit CPU +// bytestride must be a multiple of sizeof(void*), and must be at least 16 +// +// If makelinear is true, the list elements are at offsets 0, 1, 2, ... times +// stride. If makelinear is false, the list elements are in a scrambled order +// that is intended to defeat any cache prefetching hardware. See the POLY8 +// discussion above. +// +// This routine is not intended to be particularly fast; it is called just once +// +// Returns a pointer to the first element of the list +// +Pair* MakeLongList(uint8* ptr, int bytesize, int bytestride, bool makelinear) { + // Make an array of 256 mixed-up offsets + // 0, ff, e3, db, ... 7b, f6, f1 + int mixedup[256]; + // First element + mixedup[0] = 0; + // 255 more elements + uint8 x = POLYINIT8; + for (int i = 1; i < 256; ++i) { + mixedup[i] = x; + x = POLYSHIFT8(x); + } + + Pair* pairptr = reinterpret_cast(ptr); + int element_count = bytesize / bytestride; + // Make sure next element is in different DRAM row than current element + int extrabit = makelinear ? 0 : (1 << 14); + // Fill in N-1 elements, each pointing to the next one + for (int i = 1; i < element_count; ++i) { + // If not linear, there are mixed-up groups of 256 elements chained together + int nextelement = makelinear ? i : (i & ~0xff) | mixedup[i & 0xff]; + Pair* nextptr = reinterpret_cast(ptr + ((nextelement * bytestride) ^ extrabit)); + pairptr->next = nextptr; + pairptr->data = 0; + pairptr = nextptr; + } + // Fill in Nth element + pairptr->next = NULL; + pairptr->data = 0; + + return reinterpret_cast(ptr); +} + +// Read all the bytes +void TrashTheCaches(const uint8* ptr, int bytesize) { + // Fill up array with pseudo-random nonzero values + const uint64* uint64ptr = reinterpret_cast(ptr); + int wordcount = bytesize >> 3; + uint64 sum = 0; + for (int i = 0; i < wordcount; ++i) { + sum += uint64ptr[i]; + } + + // Make sum live so compiler doesn't delete the loop + if (gNeverZero == 0) {fprintf(stdout, "sum = %lld\n", sum);} +} + +int64 NaiveTiming(uint8* ptr, int bytesize, int bytestride) { + const Pair* pairptr = reinterpret_cast(ptr); + int pairstride = bytestride / sizeof(Pair); + int64 sum = 0; + + // Try to force the data we will access out of the caches + TrashTheCaches(ptr, bytesize); + + // Load 256 items spaced by stride + // May have multiple loads outstanding; may have prefetching + // Unroll four times to attempt to reduce loop overhead in timing + int64 startcy = GetCycles(); + for (int i = 0; i < 256; i += 4) { + sum += pairptr[0 * pairstride].data; + sum += pairptr[1 * pairstride].data; + sum += pairptr[2 * pairstride].data; + sum += pairptr[3 * pairstride].data; + pairptr += 4 * pairstride; + } + int64 stopcy = GetCycles(); + int64 elapsed = stopcy - startcy; // cycles + + // Make sum live so compiler doesn't delete the loop + if (gNeverZero == 0) {fprintf(stdout, "sum = %lld\n", sum);} + + return elapsed >> 8; // cycles per load for 256 loads +} + +int64 LinkedTiming(uint8* ptr, int bytesize, int bytestride, bool makelinear) { + const Pair* pairptr = MakeLongList(ptr, bytesize, bytestride, makelinear); + + // Try to force the data we will access out of the caches + TrashTheCaches(ptr, bytesize); + + // Load 256 items in linked list + // May have prefetching + // Unroll four times to attempt to reduce loop overhead in timing + int64 startcy = GetCycles(); + for (int i = 0; i < 256; i += 4) { + pairptr = pairptr->next; + pairptr = pairptr->next; + pairptr = pairptr->next; + pairptr = pairptr->next; + } + int64 stopcy = GetCycles(); + int64 elapsed = stopcy - startcy; // cycles + + // Make final pairptr live so compiler doesn't delete the loop + if (gNeverZero == 0) {fprintf(stdout, "pairptr->data = %lld\n", pairptr->data);} + + return elapsed >> 8; // cycles per load for 256 loads +} + +int64 LinearTiming(uint8* ptr, int bytesize, int bytestride) { + bool makelinear = true; + return LinkedTiming(ptr, bytesize, bytestride, makelinear); +} + +int64 ScrambledTiming(uint8* ptr, int bytesize, int bytestride) { + bool makelinear = false; + return LinkedTiming(ptr, bytesize, bytestride, makelinear); +} + +int64 ScrambledLoads(const Pair* pairptr, int count) { + // Unroll four times to attempt to reduce loop overhead in timing + int64 startcy = GetCycles(); + for (int i = 0; i < (count >> 2); ++i) { + pairptr = pairptr->next; + pairptr = pairptr->next; + pairptr = pairptr->next; + pairptr = pairptr->next; + } + int64 stopcy = GetCycles(); + int64 elapsed = stopcy - startcy; // cycles + + // Make final pairptr live so compiler doesn't delete the loop + if (gNeverZero == 0) {fprintf(stdout, "pairptr->data = %lld\n", pairptr->data);} + + return elapsed / count; +} + +void FindCacheSizes(uint8* ptr, int kMaxArraySize, int linesize) { + bool makelinear = false; + const Pair* pairptr = MakeLongList(ptr, kMaxArraySize, linesize, makelinear); + + // Load 16 to 512K cache lines and time it. 32MB cache / 64B linesize = 512K lines. + for (int lgcount = 4; lgcount <= 19; ++lgcount) { + int count = 1 << lgcount; + + // Try to force the data we will access out of the caches + TrashTheCaches(ptr, kMaxArraySize); + + // Repeat four times. First will take cache misses, others will not if small enough + fprintf(stdout, "lgcount[%d] load N cache lines, giving cy/ld. Repeat. ", lgcount); + for (int i = 0; i < 4; ++i) { + int64 cyclesperload = ScrambledLoads(pairptr, count); + fprintf(stdout, "%lld ", cyclesperload); + } + fprintf(stdout, "\n"); + } +} + +void FindCacheAssociativity(uint8* ptr, int kMaxArraySize, int linesize, int totalsize) { + // You get to fill this in. + fprintf(stdout, "FindCacheAssociativity(%d, %d) not implemented yet.\n", linesize, totalsize); +} + + +int main (int argc, const char** argv) { + // Since it is not January 1, 1970, the returned value won't be zero, but + // the compiler doesn't know that. + gNeverZero = time(NULL); + + uint8* rawptr; + uint8* ptr = AllocPageAligned(kMaxArraySize, &rawptr); + + // Loop across various strides looking for cache line size + for (int lgstride = kMinLgStride; lgstride <= kMaxLgStride; ++lgstride) { + int stride = 1 << lgstride; + + // Do naive timing + int64 naive_cyclesperload = NaiveTiming(ptr, kMaxArraySize, stride); + + // Do linear linked list timing + int64 linear_cyclesperload = LinearTiming(ptr, kMaxArraySize, stride); + + // Do scrambled linked list timing + int64 Scrambled_cyclesperload = ScrambledTiming(ptr, kMaxArraySize, stride); + + fprintf(stdout, + "stride[%d] naive %lld cy/ld, linear %lld cy/ld, scrambled %lld cy/ld\n", + stride, + naive_cyclesperload, linear_cyclesperload, Scrambled_cyclesperload); + } + + // We will learn above that the cache line size is 64 bytes. Now look to see + // how big each cache level is. + // Strategy: Read N lines into the cache, then reread, looking at timings. + // Vary N to see what values exceed the size of each level of cache + + int linesize = 64; + FindCacheSizes(ptr, kMaxArraySize, linesize); + + + // We will learn that the cache sizes for L1 and L2 are 32KB and 256KB respectively, + // and for L3 either 4MB (dclab-1) or 3MB (dclab-2) + + int l1_total_size = 32 * 1024; + FindCacheAssociativity(ptr, kMaxArraySize, linesize, l1_total_size); + + int l2_total_size = 256 * 1024; + FindCacheAssociativity(ptr, kMaxArraySize, linesize, l2_total_size); + + int l3_total_size = 2048 * 1024; + FindCacheAssociativity(ptr, kMaxArraySize, linesize, l3_total_size); + + + free(rawptr); + return 0; +} + diff --git a/book-user-code/mystery23.cc b/book-user-code/mystery23.cc new file mode 100644 index 000000000000..6490c1f040fd --- /dev/null +++ b/book-user-code/mystery23.cc @@ -0,0 +1,148 @@ +// schedtest.cc +// Little program to observe scheduler choices +// Copyright 2021 Richard L. Sites +// +// compile with g++ -O2 -pthread mystery23.cc kutrace_lib.cc -o mystery23 + +#include +#include +#include +#include +#include +#include + +#include "basetypes.h" +#include "kutrace_lib.h" + +// From Jenkins hash +#define mix(a,b,c) \ +{ \ + a -= b; a -= c; a ^= (c>>13); \ + b -= c; b -= a; b ^= (a<<8); \ + c -= a; c -= b; c ^= (b>>13); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<16); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>3); \ + b -= c; b -= a; b ^= (a<<10); \ + c -= a; c -= b; c ^= (b>>15); \ +} + + +enum SchedType { + CFS = 0, + FIFO, + RR +}; + +/* Count is chosen to run main loop about 1 second */ +static const int kLOOPCOUNT = 8000; + +/* Size is chosen to fit into a little less thsan 256KB */ +static const int kSIZE = 64 * 960; /* 4-byte words */ + +/* Calculate a hash over s, some multiple of 12 bytes long */ +/* Length is count of 32-bit words */ +uint32 hash(uint32* s, uint32 length, uint32 initval) { + uint32 a,b,c,len; + + /* Set up the internal state */ + len = length; + a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ + c = initval; /* the previous hash value */ + + /*---------------------------------------- handle most of the string */ + while (len >= 3) + { + a += s[0] ; + b += s[1]; + c += s[2]; + mix(a,b,c); + s += 3; len -= 3; + } + /*-------------------------------------------- report the result */ + return c; +} + + +/* Do some work for about a second */ +/* Return hashval to make it live; caller ignores */ +void* CalcLoop(void* unused_arg) { + //fprintf(stdout, " CalcLoop(%d)\n", *(int*)unused_arg); + + /* Simple arbitrary initialization */ + uint32 foo[kSIZE]; /* A little less than 256KB */ + for (int i = 0; i < kSIZE; ++i) {foo[i] = (i & 1023) * 1041667;} + + /* Main loop */ + volatile uint32 hashval = 0; + for (int i = 0; i < kLOOPCOUNT; ++i) { + hashval = hash(foo, kSIZE, hashval); + } + + return NULL; +} + + +void DoParallel(int n, SchedType schedtype) { + kutrace::mark_d(n); + //fprintf(stdout, "DoParallel(%d)\n", n); + pthread_t* thread_id = (pthread_t*)malloc(n * sizeof(pthread_t)); + /* Spawn n threads */ + for (int i = 0; i < n; ++i) { + pthread_attr_t attr; + struct sched_param sparam; + sparam.sched_priority = 1; + pthread_attr_init(&attr); + /* Defaults to CFS, called SCHED_OTHER */ + if (schedtype == FIFO) { + pthread_attr_setschedpolicy(&attr, SCHED_FIFO); + pthread_attr_setschedparam(&attr, &sparam); + pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); + } + if (schedtype == RR) { + pthread_attr_setschedpolicy(&attr, SCHED_RR); + pthread_attr_setschedparam(&attr, &sparam); + pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); + } + + int iret = pthread_create(&thread_id[i], NULL, CalcLoop, &i); + if (iret != 0) {fprintf(stderr, "pthread_create() error %d\n", iret);} + + pthread_attr_destroy(&attr); + } + + /* Wait for all n threads to finish */ + for (int i = 0; i < n; ++i) { + pthread_join(thread_id[i], NULL); + //fprintf(stdout, " ret[%d]\n", i); + } + + free(thread_id); + //fprintf(stdout, "\n"); + +}; + +void Usage() { + fprintf(stderr, "Usage: schedtest [-cfs(d) | -fifo | -rr]\n"); + exit(EXIT_FAILURE); +} + + +// Spawn different numbers of parallel threads +int main(int argc, const char** argv) { + SchedType schedtype = CFS; // default + for (int i = 1; i < argc; ++i) { + if (strcmp(argv[i], "-cfs") == 0) {schedtype = CFS;} + if (strcmp(argv[i], "-fifo") == 0) {schedtype = FIFO;} + if (strcmp(argv[i], "-rr") == 0) {schedtype = RR;} + } + + // Spawn 1..12 parallel processes + for (int n = 1; n <= 12; ++n) { + DoParallel(n, schedtype); + } + + exit(EXIT_SUCCESS); +} + diff --git a/book-user-code/mystery25.cc b/book-user-code/mystery25.cc new file mode 100644 index 000000000000..d4dfbec34d9a --- /dev/null +++ b/book-user-code/mystery25.cc @@ -0,0 +1,243 @@ +// Little program to time disk transfers +// Copyright 2021 Richard L. Sites + +// Usage: mystery25 +// Compile with +// g++ -O2 mystery25.cc kutrace_lib.cc -o mystery25 + +#include +#include +#include +#include +#include +#include +#include // gettimeofday +#include // lseek +#include +#include // lseek + +#include "basetypes.h" +#include "kutrace_lib.h" +#include "polynomial.h" +#include "timecounters.h" + +// Time on a slow 5400 rpm disk +// $ ./mystery25 /tmp/myst25.bin +// opening /tmp/myst25.bin for write +// write: 40.000MB 0.008sec 4741.022MB/sec +// sync: 40.000MB 0.836sec 47.835MB/sec +// read: 40.000MB 0.673sec 59.466MB/sec +// seq read: 40.000MB 1.470sec 27.203MB/sec +// rand read: 40.000MB 68.849sec 0.581MB/sec + +// Time on a cheap SSD +// $ ./mystery25 /datassd/dserve/myst25.bin +// opening /datassd/dserve/myst25.bin for write +// write: 40.000MB 0.013sec 3030.303MB/sec +// sync: 40.000MB 0.068sec 587.070MB/sec +// read: 40.000MB 0.057sec 706.003MB/sec +// seq read: 40.000MB 0.548sec 72.947MB/sec +// rand read: 40.000MB 0.909sec 43.985MB/sec + + +// Time on two disk files simultaneously +// $ ./mystery25 /tmp/myst25.bin & ./mystery25 /tmp/myst25a.bin +// [2] 3306 +// opening /tmp/myst25.bin for write +// opening /tmp/myst25a.bin for write +// write: 40.000MB 0.009sec 4236.839MB/sec +// write: 40.000MB 0.019sec 2158.312MB/sec +// sync: 40.000MB 1.762sec 22.704MB/sec +// sync: 40.000MB 1.753sec 22.822MB/sec +// read: 40.000MB 1.498sec 26.701MB/sec +// read: 40.000MB 1.525sec 26.234MB/sec +// seq read: 40.000MB 3.087sec 12.958MB/sec +// seq read: 40.000MB 3.276sec 12.210MB/sec +// rand read: 40.000MB 151.276sec 0.264MB/sec +// dsites@dclab-2:~/code$ rand read: 40.000MB 151.074sec 0.265MB/sec + +// $ ./mystery25 /tmp/myst25.bin & ./mystery25 /tmp/myst25a.bin +// opening /tmp/myst25.bin for write +// opening /tmp/myst25a.bin for write +// write: 40.000MB 0.009sec 4236.839MB/sec +// write: 40.000MB 0.019sec 2158.312MB/sec +// sync: 40.000MB 1.762sec 22.704MB/sec +// sync: 40.000MB 1.753sec 22.822MB/sec +// read: 40.000MB 1.498sec 26.701MB/sec +// read: 40.000MB 1.525sec 26.234MB/sec +// seq read: 40.000MB 3.087sec 12.958MB/sec +// seq read: 40.000MB 3.276sec 12.210MB/sec +// rand read: 40.000MB 151.276sec 0.264MB/sec +// dsites@dclab-2:~/code$ rand read: 40.000MB 151.074sec 0.265MB/sec + +// Time on two SSD files simultaniously +// $ ./mystery25 /datassd/dserve/myst25.bin & ./mystery25 /datassd/dserve/myst25a.bin +// [2] 3479 +// opening /datassd/dserve/myst25.bin for write +// opening /datassd/dserve/myst25a.bin for write +// write: 40.000MB 0.010sec 4126.689MB/sec +// write: 40.000MB 0.016sec 2449.329MB/sec +// sync: 40.000MB 0.161sec 247.681MB/sec +// sync: 40.000MB 0.155sec 258.777MB/sec +// read: 40.000MB 0.109sec 368.636MB/sec +// read: 40.000MB 0.112sec 356.617MB/sec +// seq read: 40.000MB 0.944sec 42.363MB/sec +// seq read: 40.000MB 0.942sec 42.478MB/sec +// rand read: 40.000MB 0.971sec 41.176MB/sec +// rand read: 40.000MB 0.971sec 41.176MB/sec + +static const int kPageSize = 4096; // Must be a power of two +static const int kPageSizeMask = kPageSize - 1; + +// Make an array bigger than any expected cache size +static const int kMaxArraySize = 40 * 1024 * 1024; + + +// Order-of-magnitude times: +// One disk revolution at 7200 RPM = 8.33 msec +// One disk revolution at 5400 RPM = 11.11 msec +// If transfer rate is ~100 MB,sec, a track is ~1MB long, or 256 4KB blocks +// Time to transfer a single 4KB block is ~40 usec +// Seek time for big seek is perhaps 15 msec +// Seek time track-to-track is perhaps 5 msec + +// Allocate a byte array of given size, aligned on a page boundary +// Caller will call free(rawptr) +uint8* AllocPageAligned(int bytesize, uint8** rawptr) { + int newsize = bytesize + kPageSizeMask; + *rawptr = reinterpret_cast(malloc(newsize)); + uintptr_t temp = reinterpret_cast(*rawptr); + uintptr_t temp2 = (temp + kPageSizeMask) & ~kPageSizeMask; + return reinterpret_cast(temp2); +} + +// Fill byte array with non-zero pseudo-random bits +void PseudoAll(uint8* ptr, int bytesize) { + uint32* wordptr = reinterpret_cast(ptr); + int wordcount = bytesize >> 2; + uint32 x = POLYINIT32; + for (int i = 0; i < wordcount; ++i) { + *wordptr++ = x; + x = POLYSHIFT32(x); + } +} + +// Set buffer to pseudo-random non-zero bytes +void InitAll(uint8* ptr, int size) { + kutrace::mark_b("init"); + PseudoAll(ptr, kMaxArraySize); + kutrace::mark_b("/init"); +} + +void WriteAll(const char* filename, uint8* ptr, int size) { + kutrace::mark_a("write"); + fprintf(stdout, "opening %s for write\n", filename); + int fd = open(filename, O_WRONLY | O_CREAT, S_IRWXU); + if (fd < 0) {perror(" FAILED write open"); exit(0);} + FILE* f = fdopen(fd, "wb"); + int64 startusec = GetUsec(); + ssize_t ignoreme = fwrite(ptr, 1, size, f); + int64 elapsedusec = GetUsec() - startusec; + fclose(f); + double mb = size / 1048576.0; + double sec = elapsedusec / 1000000.0; + fprintf(stdout, " write: %6.2fMB %6.3fsec %6.2f MB/sec\n", mb, sec, mb/sec); + kutrace::mark_a("/write"); +} + +void SyncAll(int size) { + kutrace::mark_b("sync"); + int64 startusec = GetUsec(); + sync(); + int64 elapsedusec = GetUsec() - startusec; + double mb = size / 1048576.0; + double sec = elapsedusec / 1000000.0; + fprintf(stdout, " sync: %6.2fMB %6.3fsec %6.2f MB/sec\n", mb, sec, mb/sec); + kutrace::mark_b("/sync"); +} + +void ReadAll(const char* filename,uint8* ptr, int size) { + kutrace::mark_a("read"); + int fd = open(filename, O_RDONLY | O_DIRECT); + if (fd < 0) {perror(" FAILED read open"); exit(0);} + FILE* f = fdopen(fd, "rb"); + int64 startusec = GetUsec(); + ssize_t ignoreme = fread(ptr, 1, size, f); + int64 elapsedusec = GetUsec() - startusec; + fclose(f); + double mb = size / 1048576.0; + double sec = elapsedusec / 1000000.0; + fprintf(stdout, " read: %6.2fMB %6.3fsec %6.2f MB/sec\n", mb, sec, mb/sec); + kutrace::mark_a("/read"); +} + +void ReadSeq(const char* filename,uint8* ptr, int size) { + int blkcount = size >> 12; + kutrace::mark_b("seq"); + int fd = open(filename, O_RDONLY | O_DIRECT); + if (fd < 0) {perror(" FAILED read open"); exit(0);} + FILE* f = fdopen(fd, "rb"); + int64 startusec = GetUsec(); + for (int i = 0; i < blkcount; ++i) { + ssize_t ignoreme = fread(ptr, 1, 1 <<12, f); + } + int64 elapsedusec = GetUsec() - startusec; + fclose(f); + double mb = size / 1048576.0; + double sec = elapsedusec / 1000000.0; + fprintf(stdout, " seq read: %6.2fMB %6.3fsec %6.2f MB/sec\n", mb, sec, mb/sec); + kutrace::mark_b("/seq"); +} + +void ReadRand(const char* filename,uint8* ptr, int size) { + int blkcount = size >> 12; + uint32 x = POLYINIT32; + kutrace::mark_a("rand"); + int fd = open(filename, O_RDONLY | O_DIRECT); + if (fd < 0) {perror(" FAILED read open"); exit(0);} + FILE* f = fdopen(fd, "rb"); + int64 startusec = GetUsec(); + for (int i = 0; i < blkcount; ++i) { + int j = x % blkcount; + x = POLYSHIFT32(x); + fseek(f, j << 12, SEEK_SET); + ssize_t ignoreme = fread(ptr, 1, 1 <<12, f); + } + int64 elapsedusec = GetUsec() - startusec; + fclose(f); + double mb = size / 1048576.0; + double sec = elapsedusec / 1000000.0; + fprintf(stdout, " rand read: %6.2fMB %6.3fsec %6.2f MB/sec\n", mb, sec, mb/sec); + kutrace::mark_a("/rand"); +} + +void Usage() { + fprintf(stderr, "Usage: mystery3 \n"); +} + +int main (int argc, const char** argv) { + if (argc < 2) {Usage(); return 0;} + const char* filename = argv[1]; + + kutrace::msleep(100); // Wait 100 msec so we might start on an idle CPU + + // Allocate a 40MB array aligned on a 4KB boundary + uint8* rawptr; + uint8* ptr = AllocPageAligned(kMaxArraySize, &rawptr); + + // Set buffer to pseudo-random non-zero bytes + InitAll(ptr, kMaxArraySize); + WriteAll(filename, ptr, kMaxArraySize); + // Get it really out to disk + SyncAll(kMaxArraySize); + ReadAll(filename, ptr, kMaxArraySize); + ReadSeq(filename, ptr, kMaxArraySize); + ReadRand(filename, ptr, kMaxArraySize); + + free(rawptr); + return 0; +} + + + + diff --git a/book-user-code/mystery27.cc b/book-user-code/mystery27.cc new file mode 100644 index 000000000000..5c9c88788351 --- /dev/null +++ b/book-user-code/mystery27.cc @@ -0,0 +1,453 @@ +// Little program to use locks +// Copyright 2021 Richard L. Sites +// +// +// We would like to run for a few hundred msec total, with perhaps 1000-10000 work +// calls and 10-ish debug calls so maybe wait 20 msec, total run 250-300 msec +// 3 threads doing updates each doing about 3000 calls in 200 msec = 70 usec each +// complain if over 100 usec +// debug to use at least 200 usec and 1 msec better +// +// compile with g++ -O2 -pthread mystery27.cc fancylock2.cc mutex2.cc kutrace_lib.cc dclab_log.cc -o mystery27 +// +// Command-line options: +// -smallwork -nowork control how much fake work is done by worker-threads holding the locks +// -dash0 -dash1 -dash2 -dash3 control which locking style is usedby dashbaord threads + +#include +#include +#include +#include +#include +#include +#include +#include // nanosleep +#include // read() +//#include +//#include /* superset of previous */ +//#include +//#include +//#include + +#include + +#include "basetypes.h" +#include "dclab_log.h" // for GetUsec() +#include "fancylock2.h" +#include "kutrace_lib.h" +#include "mutex2.h" +#include "polynomial.h" +#include "timecounters.h" + +using std::string; + +#define handle_error_en(en, msg) \ + do { errno = en; perror(msg); exit(EXIT_FAILURE); } while (0) + +static const int MAX_ACCOUNTS = 100; + +static const int EXTRA_DASHBOARD_USEC = 500; +static const int FAKEWORK_ITER = 140; +static const int WORKER_ITER = 10000; +static const int DASHBOARD_ITER = 50; + + +enum ActionType { + Deposit = 0, + Getcash, + Debitcard, + Balance +}; + +static const char* kActionName[4] = {"Deposit", "Getcash", "Debitcard", "Balance"}; +static const char* kActionNameStart[4] = {"depo", "cash", "debit", "bal"}; +static const char* kActionNameEnd[4] = {"/depo", "/cash", "/debit", "/bal"}; + +static const ActionType kActionFreq[16] = { + Deposit, Getcash, Getcash, Debitcard, + Debitcard, Debitcard, Balance, Balance, + Balance, Balance, Balance, Balance, + Balance, Balance, Balance, Balance, +}; + +static const bool debugging = true; + +typedef struct { + int incr_count; + int decr_count; + double balance; + double other; +} Xdata; + +typedef struct { + int type; + int account; + double amount; + int fake_work_usec; + int pad; +} Action; + +// The shared database +typedef struct { + FancyLock2* readerlock[2]; // Two locks, for subscripting by account# low bit + FancyLock2* writerlock[2]; + Xdata bankbalance; + Xdata accounts[MAX_ACCOUNTS]; +} Database; + +typedef struct { /* Used as argument to created threads */ + pthread_t thread_id; /* ID returned by pthread_create() */ + int thread_num; /* Application-defined thread # */ + Database* db; /* Shared data */ +} ThreadInfo; + +typedef void DashProc(int whoami, Database* db); + +// +// ---- Globals +// + +// Readers here are mutually exclusive but quick +// We expect the reader lock to take no more than 50 usec to acquire, 90% of the time +DEFINE_FANCYLOCK2(global_readerlock, 50); + +// Writers are mutually exclusive and may take a while +// We expect the writer lock to take no more than 100 usec to acquire, 90% of the time +DEFINE_FANCYLOCK2(global_writerlock, 100); + +// A second locks for experiment in spreadng locking around based on low bit +// of account number. +DEFINE_FANCYLOCK2(global_readerlock2, 50); +DEFINE_FANCYLOCK2(global_writerlock2, 100); + + +static bool alwaysfalse; // Assigned in main + +// Command-line parameters set these +static int workshift; // 0 shift gives 0..255 usec delay +static int workmul; // 1 gives WORKER_ITER iterations,2 2x, etc. +static bool lockbal; // If true, take out the lock for balance transactions +static int lockmod; // Mask. 0 = single locks, 1 = multiple locks based on low bit of account# +static int nocapture; // If true, reduce lock capture by waiting between acq +static DashProc* dashproc; // One of several dashboard procs. default: lock for ~500 usec + +// ---- End globals + +// Wait n msec +void WaitMsec(int msec) { + if (msec == 0) {return;} + struct timespec req; + req.tv_sec = msec / 1000; + req.tv_nsec = (msec % 1000) * 1000000; + nanosleep(&req, NULL); +} + + +// Do some low-issue-rate bogus work for approximately N microseconds +void DoFakeWork(int usec) { + double bogus = alwaysfalse ? 1.0 : 3.0; + for (int i = 0; i < usec; ++i) { + for (int n = 0; n < FAKEWORK_ITER; ++n) { + bogus /= 0.999999999; + bogus /= 1.000000001; + } + } + if (alwaysfalse) {printf("%f", bogus);} // Keep live +} + +void dumpaction(FILE* f,int whoami, const Action* action) { + int64 now = GetUsec(); + fprintf(f, "%02lld.%06llds [%d] Action %s(%d) $%5.2f %dus\n", + (now / 1000000) % 60, now % 1000000, whoami, + kActionName[action->type], action->account, action->amount, action->fake_work_usec); +} + +// Little generator called by multiple threads. +// each with its own random# base on the stack. global pointer to database +void MakeAction(Action* action, uint32* rand) { + uint32 x; + *rand = x = POLYSHIFT32(*rand); + memset(action, 0, sizeof(Action)); + action->type = kActionFreq[x & 15]; + action->account = (x >> 4) % MAX_ACCOUNTS; + action->amount = ((x >> 8) & 0xFFFF) / 100.0; // $0.00 to 655.35 + action->fake_work_usec = (x >> 24) & 0xFF; // 0 to 255 usec + action->fake_work_usec >>= workshift; + switch (action->type) { + case Deposit: + break; + case Getcash: + // Multiple of $20.00, 2/5 of full range + action->amount = - floor(action->amount / 50.00) * 20.00; + break; + case Debitcard: + // Multiple of $1, 1/5 of full range + action->amount = - floor(action->amount / 5.00); + break; + case Balance: + // No amount, half as much fake work + action->amount = 0.0; + action->fake_work_usec >>= 1; + break; + default: + break; + } +} + +void Update(Xdata* xdata, double amount) { + if (amount >= 0.0) { + ++xdata->incr_count; + } else { + ++xdata->decr_count; + } + xdata->balance += amount; +} + +double ReadBalance(const Xdata* xdata) { + return xdata->balance; +} + +void DoAction(int whoami, const Action* action, Database* db) { + double balance; + int locknum = action->account & lockmod; + switch (action->type) { + case Deposit: + case Getcash: + case Debitcard: + { + // Take out both locks + Mutex2 lock1(whoami, db->readerlock[locknum]); + Mutex2 lock2(whoami, db->writerlock[locknum]); + Update(&db->accounts[action->account], action->amount); + Update(&db->bankbalance, action->amount); + DoFakeWork(action->fake_work_usec); + } + // Reduce odds of lock capture by delaying ~10 usec after freeing locks + if (nocapture) {DoFakeWork(10);} + break; + case Balance: + if (lockbal) { + // Take out just the reader lock + Mutex2 lock1(whoami, db->readerlock[locknum]); + // Ignore the balance but make it live + balance = ReadBalance(&db->accounts[action->account]); + if (alwaysfalse) {printf("%f", balance);} + DoFakeWork(action->fake_work_usec); + } else { + // No lock at all + // Ignore the balance but make it live + balance = ReadBalance(&db->accounts[action->account]); + if (alwaysfalse) {printf("%f", balance);} + DoFakeWork(action->fake_work_usec); + } + // Reduce odds of lock capture by delaying ~10 usec after freeing locks + if (lockbal && nocapture) {DoFakeWork(10);} + break; + default: + DoFakeWork(action->fake_work_usec); + break; + } +} + +/*** +dashboard output fake html + + +
+  Account xxxx, incr xxxx, decr xxxx, balance xxxx
+  Account xxxx, incr xxxx, decr xxxx, balance xxxx
+  Bank total    incr xxxx, decr xxxx, balance xxxx
+
+ + +***/ + +string BuildDashboardString(const Database* db) { + char buffer[256]; + string s; + s += "
\n";
+  s += "Dashboard\n";
+  for (int i = 0; i < MAX_ACCOUNTS; ++i) {
+    if (db->accounts[i].balance != 0.00) {
+      sprintf(buffer, "account %04d deposits %4d, withdrawls %4d, balance %8.2f\n", 
+        i, db->accounts[i].incr_count, db->accounts[i].decr_count, db->accounts[i].balance);
+      s += buffer; 
+    }
+  }
+  sprintf(buffer, "Bank Total   deposits %4d, withdrawls %4d, balance %8.2f\n", 
+    db->bankbalance.incr_count, db->bankbalance.decr_count, db->bankbalance.balance);
+  s += "  
\n"; + + DoFakeWork(EXTRA_DASHBOARD_USEC); + return s; +} + +void NoLockDebugDashboard(int whoami, Database* db) { + string s = BuildDashboardString(db); + if (debugging) { + fprintf(stdout, "%s\n", s.c_str()); + } +} +void DoDebugDashboard(int whoami, Database* db) { + // Take out all locks + Mutex2 lock1(whoami, db->readerlock[0]); + Mutex2 lock2(whoami, db->readerlock[1]); + Mutex2 lock3(whoami, db->writerlock[0]); + Mutex2 lock4(whoami, db->writerlock[1]); + string s = BuildDashboardString(db); + if (debugging) { + fprintf(stdout, "%s\n", s.c_str()); + } +} + +void BetterDebugDashboard(int whoami, Database* db) { + if (!debugging) {return;} + + { + // Take out all locks + Mutex2 lock1(whoami, db->readerlock[0]); + Mutex2 lock2(whoami, db->readerlock[1]); + Mutex2 lock3(whoami, db->writerlock[0]); + Mutex2 lock4(whoami, db->writerlock[1]); + string s = BuildDashboardString(db); + fprintf(stdout, "%s\n", s.c_str()); + } +} + +void EvenBetterDebugDashboard(int whoami, Database* db) { + if (!debugging) {return;} + Database db_copy; + + kutrace::mark_a("copy"); + { + // Take out all locks + Mutex2 lock1(whoami, db->readerlock[0]); + Mutex2 lock2(whoami, db->readerlock[1]); + Mutex2 lock3(whoami, db->writerlock[0]); + Mutex2 lock4(whoami, db->writerlock[1]); + db_copy = *db; + // Free both locks on block exit + } + kutrace::mark_a("/copy"); + + string s = BuildDashboardString(&db_copy); + fprintf(stdout, "%s\n", s.c_str()); +} + + +void DbInit(Database* db) { + db->readerlock[0] = &global_readerlock; // Multiple reader locks + db->readerlock[1] = &global_readerlock2; + db->writerlock[0] = &global_writerlock; + db->writerlock[1] = &global_writerlock2; + memset(&db->bankbalance, 0, sizeof(Xdata)); + memset(&db->accounts[0], 0, MAX_ACCOUNTS * sizeof(Xdata)); +} + +void* worker_thread(void* arg) { + ThreadInfo* tinfo = (ThreadInfo*)(arg); + int whoami = tinfo->thread_num; + fprintf(stdout, "\nWorker thread %d started\n", whoami); + Action action; + uint32 rand = POLYINIT32; + int count = WORKER_ITER * workmul; + for (int i = 0; i < count; ++i) { + MakeAction(&action, &rand); +////dumpaction(stdout, whoami, &action); + kutrace::mark_a(kActionNameStart[action.type]); + DoAction(whoami, &action, tinfo->db); + kutrace::mark_a(kActionNameEnd[action.type]); + // Indicate progress + if (((i + 1) % 1000) == 0) {fprintf(stderr, "worker[%d] %4d\n", whoami, i+1);} + } + fprintf(stdout, "\nWorker thread %d finished\n", whoami); + return NULL; +} + +void* dashboard_thread(void* arg) { + ThreadInfo* tinfo = (ThreadInfo*)(arg); + int whoami = tinfo->thread_num; + fprintf(stdout, "\nDashboard thread %d started\n", whoami); + for (int i = 0; i < DASHBOARD_ITER; ++i) { + WaitMsec(20); + (*dashproc)(whoami, tinfo->db); + // Indicate progress + if (((i + 1) % 10) == 0) {fprintf(stderr, "dashboard[%d] %4d\n", whoami, i+1);} + } + fprintf(stdout, "\nDashboard thread %d finished\n", whoami); + return NULL; +} + +// Use this is to see if usec delay is in the ballpark. +// Early use might happen with a slow CPU clock, so also try later after CPUs are warmed up +void CheckFakeWork() { + int64 start = GetUsec(); + DoFakeWork(1000); + int64 elapsed = GetUsec() - start; + fprintf(stdout,"DoFakeWork(1000) took %lld usec\n", elapsed); +} + +void Usage() { + fprintf(stderr, "Usage: mystery27 [-smallwork | -nowork] [-nolockbal] [-multilock] [-nocapture] [-dash0 | -dash1 | -dash2 | -dash3]\n"); + exit(0); +} + +int main(int argc, const char** argv) { + alwaysfalse = (time(NULL) == 0); // Never true but the compiler doesn't know that + Database db; + DbInit(&db); + CheckFakeWork(); + + // Command-line parameters, if any + workshift = 0; // 0..255 usec delay + workmul = 1; + lockbal = true; + lockmod = 0; + nocapture = false; + dashproc = &DoDebugDashboard; + for (int i = 1; i < argc; ++i) { + if (argv[i][0] != '-') {Usage();} + if (strcmp(argv[i], "-smallwork") == 0) {workshift = 3; workmul = 1;} // 0..15 usec delay + else if (strcmp(argv[i], "-nowork") == 0) {workshift = 8; workmul = 2;} // 0 usec delay + else if (strcmp(argv[i], "-nolockbal") == 0) {lockbal = false;} // No lock for balance transactions + else if (strcmp(argv[i], "-multilock") == 0) {lockmod = 1;} // Use multiple reader locks + else if (strcmp(argv[i], "-nocapture") == 0) {nocapture = true;} // Delay before re-acquire of locks + else if (strcmp(argv[i], "-dash0") == 0) {dashproc = &NoLockDebugDashboard;} // No dashboard locks at all + else if (strcmp(argv[i], "-dash1") == 0) {dashproc = &DoDebugDashboard;} // Long lock + else if (strcmp(argv[i], "-dash2") == 0) {dashproc = &BetterDebugDashboard;} // Early out long lock + else if (strcmp(argv[i], "-dash3") == 0) {dashproc = &EvenBetterDebugDashboard;} // Lock just copying + else {Usage();} + } + + // Launch several worker threads that update some shared data + ThreadInfo tinfo[4]; + for (int tnum = 0; tnum < 3; tnum++) { + tinfo[tnum].thread_num = tnum; + tinfo[tnum].db = &db; + int s = pthread_create(&tinfo[tnum].thread_id, NULL, &worker_thread, &tinfo[tnum]); + if (s != 0) {handle_error_en(s, "pthread_create");} + } + + // Launch a dashboard thread that reads the shared data + for (int tnum = 3; tnum < 4; tnum++) { + tinfo[tnum].thread_num = tnum; + tinfo[tnum].db = &db; + int s = pthread_create(&tinfo[tnum].thread_id, NULL, &dashboard_thread, &tinfo[tnum]); + if (s != 0) {handle_error_en(s, "pthread_create");} + } + + // Wait for all the threads to finish + for (int tnum = 0; tnum < 4; tnum++) { + int s = pthread_join(tinfo[tnum].thread_id, NULL); + if (s != 0) {handle_error_en(s, "pthread_join");} + CheckFakeWork(); + } + fprintf(stderr, "All threads finished\n"); + + return 0; +} + + + + + diff --git a/book-user-code/mystery27a.cc b/book-user-code/mystery27a.cc new file mode 100644 index 000000000000..5a9630e812db --- /dev/null +++ b/book-user-code/mystery27a.cc @@ -0,0 +1,464 @@ +// Little program to use locks +// Copyright 2021 Richard L. Sites +// +// +// We would like to run for a few hundred msec total, with perhaps 1000-10000 work +// calls and 10-ish debug calls so maybe wait 20 msec, total run 250-300 msec +// 3 threads doing updates each doing about 3000 calls in 200 msec = 70 usec each +// complain if over 100 usec +// debug to use at least 200 usec and 1 msec better +// +// compile with g++ -O2 -pthread mystery27a.cc fancylock2.cc mutex2.cc kutrace_lib.cc dclab_log.cc -o mystery27a +// +// Command-line options: +// -smallwork -nowork control how much fake work is done by worker-threads holding the locks +// -dash0 -dash1 -dash2 -dash3 control which locking style is usedby dashbaord threads + +#include +#include +#include +#include +#include +#include +#include +#include // nanosleep +#include // read() +//#include +//#include /* superset of previous */ +//#include +//#include +//#include + +#include + +#include "basetypes.h" +#include "dclab_log.h" // for GetUsec() +#include "fancylock2.h" +#include "kutrace_lib.h" +#include "mutex2.h" +#include "polynomial.h" +#include "timecounters.h" + +using std::string; + +#define handle_error_en(en, msg) \ + do { errno = en; perror(msg); exit(EXIT_FAILURE); } while (0) + +static const int MAX_ACCOUNTS = 100; + +static const int EXTRA_DASHBOARD_USEC = 500; +static const int FAKEWORK_ITER = 140; +static const int WORKER_ITER = 10000; +static const int DASHBOARD_ITER = 50; + + +enum ActionType { + Deposit = 0, + Getcash, + Debitcard, + Balance +}; + +static const char* kActionName[4] = {"Deposit", "Getcash", "Debitcard", "Balance"}; +static const char* kActionNameStart[4] = {"depo", "cash", "debit", "bal"}; +static const char* kActionNameEnd[4] = {"/depo", "/cash", "/debit", "/bal"}; + +static const ActionType kActionFreq[16] = { + Deposit, Getcash, Getcash, Debitcard, + Debitcard, Debitcard, Balance, Balance, + Balance, Balance, Balance, Balance, + Balance, Balance, Balance, Balance, +}; + +static const bool debugging = true; + +typedef struct { + int incr_count; + int decr_count; + double balance; + double other; +} Xdata; + +typedef struct { + int type; + int account; + double amount; + int fake_work_usec; + int pad; +} Action; + +// The shared database +typedef struct { + FancyLock2* readerlock[4]; // Four locks, for subscripting by account# low bits + //FancyLock2* writerlock[2]; + Xdata bankbalance; + Xdata accounts[MAX_ACCOUNTS]; +} Database; + +typedef struct { /* Used as argument to created threads */ + pthread_t thread_id; /* ID returned by pthread_create() */ + int thread_num; /* Application-defined thread # */ + Database* db; /* Shared data */ +} ThreadInfo; + +typedef void DashProc(int whoami, Database* db); + +// +// ---- Globals +// + +// Readers here are mutually exclusive but quick +// We expect the reader lock to take no more than 50 usec to acquire, 90% of the time +DEFINE_FANCYLOCK2(global_readerlock, 50); + +// Writers are mutually exclusive and may take a while +// We expect the writer lock to take no more than 100 usec to acquire, 90% of the time +//DEFINE_FANCYLOCK2(global_writerlock, 100); + +// More locks for experiment in spreadng locking around based on low bits +// of account number. +DEFINE_FANCYLOCK2(global_readerlock2, 50); +DEFINE_FANCYLOCK2(global_readerlock3, 50); +DEFINE_FANCYLOCK2(global_readerlock4, 50); +//DEFINE_FANCYLOCK2(global_writerlock2, 100); + + +static bool alwaysfalse; // Assigned in main + +// Command-line parameters set these +static int workshift; // 0 shift gives 0..255 usec delay +static int workmul; // 1 gives WORKER_ITER iterations,2 2x, etc. +static bool lockbal; // If true, take out the lock for balance transactions +static int lockmod; // Mask. 0 = single locks, 1 = multiple locks based on low bit of account#, 3=lo 2 bits, etc. +static int nocapture; // If true, reduce lock capture by waiting between acq +static DashProc* dashproc; // One of several dashboard procs. default: lock for ~500 usec + +// ---- End globals + +// Wait n msec +void WaitMsec(int msec) { + if (msec == 0) {return;} + struct timespec req; + req.tv_sec = msec / 1000; + req.tv_nsec = (msec % 1000) * 1000000; + nanosleep(&req, NULL); +} + + +// Do some low-issue-rate bogus work for approximately N microseconds +void DoFakeWork(int usec) { + double bogus = alwaysfalse ? 1.0 : 3.0; + for (int i = 0; i < usec; ++i) { + for (int n = 0; n < FAKEWORK_ITER; ++n) { + bogus /= 0.999999999; + bogus /= 1.000000001; + } + } + if (alwaysfalse) {printf("%f", bogus);} // Keep live +} + +void dumpaction(FILE* f,int whoami, const Action* action) { + int64 now = GetUsec(); + fprintf(f, "%02lld.%06llds [%d] Action %s(%d) $%5.2f %dus\n", + (now / 1000000) % 60, now % 1000000, whoami, + kActionName[action->type], action->account, action->amount, action->fake_work_usec); +} + +// Little generator called by multiple threads. +// each with its own random# base on the stack. global pointer to database +void MakeAction(Action* action, uint32* rand) { + uint32 x; + *rand = x = POLYSHIFT32(*rand); + memset(action, 0, sizeof(Action)); + action->type = kActionFreq[x & 15]; + action->account = (x >> 4) % MAX_ACCOUNTS; + action->amount = ((x >> 8) & 0xFFFF) / 100.0; // $0.00 to 655.35 + action->fake_work_usec = (x >> 24) & 0xFF; // 0 to 255 usec + action->fake_work_usec >>= workshift; + switch (action->type) { + case Deposit: + break; + case Getcash: + // Multiple of $20.00, 2/5 of full range + action->amount = - floor(action->amount / 50.00) * 20.00; + break; + case Debitcard: + // Multiple of $1, 1/5 of full range + action->amount = - floor(action->amount / 5.00); + break; + case Balance: + // No amount, half as much fake work + action->amount = 0.0; + action->fake_work_usec >>= 1; + break; + default: + break; + } +} + +void Update(Xdata* xdata, double amount) { + if (amount >= 0.0) { + ++xdata->incr_count; + } else { + ++xdata->decr_count; + } + xdata->balance += amount; +} + +double ReadBalance(const Xdata* xdata) { + return xdata->balance; +} + +void DoAction(int whoami, const Action* action, Database* db) { + double balance; + int locknum = action->account & lockmod; + switch (action->type) { + case Deposit: + case Getcash: + case Debitcard: + { + // Take out both locks + Mutex2 lock1(whoami, db->readerlock[locknum]); + //Mutex2 lock2(whoami, db->writerlock[locknum]); + Update(&db->accounts[action->account], action->amount); + Update(&db->bankbalance, action->amount); + DoFakeWork(action->fake_work_usec); + } + // Reduce odds of lock capture by delaying ~10 usec after freeing locks + if (nocapture) {DoFakeWork(10);} + break; + case Balance: + if (lockbal) { + // Take out just the reader lock + Mutex2 lock1(whoami, db->readerlock[locknum]); + // Ignore the balance but make it live + balance = ReadBalance(&db->accounts[action->account]); + if (alwaysfalse) {printf("%f", balance);} + DoFakeWork(action->fake_work_usec); + } else { + // No lock at all + // Ignore the balance but make it live + balance = ReadBalance(&db->accounts[action->account]); + if (alwaysfalse) {printf("%f", balance);} + DoFakeWork(action->fake_work_usec); + } + // Reduce odds of lock capture by delaying ~10 usec after freeing locks + if (lockbal && nocapture) {DoFakeWork(10);} + break; + default: + DoFakeWork(action->fake_work_usec); + break; + } +} + +/*** +dashboard output fake html + + +
+  Account xxxx, incr xxxx, decr xxxx, balance xxxx
+  Account xxxx, incr xxxx, decr xxxx, balance xxxx
+  Bank total    incr xxxx, decr xxxx, balance xxxx
+
+ + +***/ + +string BuildDashboardString(const Database* db) { + char buffer[256]; + string s; + s += "
\n";
+  s += "Dashboard\n";
+  for (int i = 0; i < MAX_ACCOUNTS; ++i) {
+    if (db->accounts[i].balance != 0.00) {
+      sprintf(buffer, "account %04d deposits %4d, withdrawls %4d, balance %8.2f\n", 
+        i, db->accounts[i].incr_count, db->accounts[i].decr_count, db->accounts[i].balance);
+      s += buffer; 
+    }
+  }
+  sprintf(buffer, "Bank Total   deposits %4d, withdrawls %4d, balance %8.2f\n", 
+    db->bankbalance.incr_count, db->bankbalance.decr_count, db->bankbalance.balance);
+  s += "
\n"; + + DoFakeWork(EXTRA_DASHBOARD_USEC); + return s; +} + +void NoLockDebugDashboard(int whoami, Database* db) { + string s = BuildDashboardString(db); + if (debugging) { + fprintf(stdout, "%s\n", s.c_str()); + } +} +void DoDebugDashboard(int whoami, Database* db) { + // Take out all locks + Mutex2 lock1(whoami, db->readerlock[0]); + Mutex2 lock2(whoami, db->readerlock[1]); + Mutex2 lock3(whoami, db->readerlock[2]); + Mutex2 lock4(whoami, db->readerlock[3]); + //Mutex2 lock3(whoami, db->writerlock[0]); + //Mutex2 lock4(whoami, db->writerlock[1]); + string s = BuildDashboardString(db); + if (debugging) { + fprintf(stdout, "%s\n", s.c_str()); + } +} + +void BetterDebugDashboard(int whoami, Database* db) { + if (!debugging) {return;} + + { + // Take out all locks + Mutex2 lock1(whoami, db->readerlock[0]); + Mutex2 lock2(whoami, db->readerlock[1]); + Mutex2 lock3(whoami, db->readerlock[2]); + Mutex2 lock4(whoami, db->readerlock[3]); + + //Mutex2 lock3(whoami, db->writerlock[0]); + //Mutex2 lock4(whoami, db->writerlock[1]); + string s = BuildDashboardString(db); + fprintf(stdout, "%s\n", s.c_str()); + } +} + +void EvenBetterDebugDashboard(int whoami, Database* db) { + if (!debugging) {return;} + Database db_copy; + + kutrace::mark_a("copy"); + { + // Take out all locks + Mutex2 lock1(whoami, db->readerlock[0]); + Mutex2 lock2(whoami, db->readerlock[1]); + Mutex2 lock3(whoami, db->readerlock[2]); + Mutex2 lock4(whoami, db->readerlock[3]); + //Mutex2 lock3(whoami, db->writerlock[0]); + //Mutex2 lock4(whoami, db->writerlock[1]); + db_copy = *db; + // Free both locks on block exit + } + kutrace::mark_a("/copy"); + + string s = BuildDashboardString(&db_copy); + fprintf(stdout, "%s\n", s.c_str()); +} + + +void DbInit(Database* db) { + db->readerlock[0] = &global_readerlock; // Multiple reader locks + db->readerlock[1] = &global_readerlock2; + db->readerlock[2] = &global_readerlock3; + db->readerlock[3] = &global_readerlock4; + //db->writerlock[0] = &global_writerlock; + //db->writerlock[1] = &global_writerlock2; + memset(&db->bankbalance, 0, sizeof(Xdata)); + memset(&db->accounts[0], 0, MAX_ACCOUNTS * sizeof(Xdata)); +} + +void* worker_thread(void* arg) { + ThreadInfo* tinfo = (ThreadInfo*)(arg); + int whoami = tinfo->thread_num; + fprintf(stdout, "\nWorker thread %d started\n", whoami); + Action action; + uint32 rand = POLYINIT32; + int count = WORKER_ITER * workmul; + for (int i = 0; i < count; ++i) { + MakeAction(&action, &rand); +////dumpaction(stdout, whoami, &action); + kutrace::mark_a(kActionNameStart[action.type]); + DoAction(whoami, &action, tinfo->db); + kutrace::mark_a(kActionNameEnd[action.type]); + // Indicate progress + if (((i + 1) % 1000) == 0) {fprintf(stderr, "worker[%d] %4d\n", whoami, i+1);} + } + fprintf(stdout, "\nWorker thread %d finished\n", whoami); + return NULL; +} + +void* dashboard_thread(void* arg) { + ThreadInfo* tinfo = (ThreadInfo*)(arg); + int whoami = tinfo->thread_num; + fprintf(stdout, "\nDashboard thread %d started\n", whoami); + for (int i = 0; i < DASHBOARD_ITER; ++i) { + WaitMsec(20); + (*dashproc)(whoami, tinfo->db); + // Indicate progress + if (((i + 1) % 10) == 0) {fprintf(stderr, "dashboard[%d] %4d\n", whoami, i+1);} + } + fprintf(stdout, "\nDashboard thread %d finished\n", whoami); + return NULL; +} + +// Use this is to see if usec delay is in the ballpark. +// Early use might happen with a slow CPU clock, so also try later after CPUs are warmed up +void CheckFakeWork() { + int64 start = GetUsec(); + DoFakeWork(1000); + int64 elapsed = GetUsec() - start; + fprintf(stdout,"DoFakeWork(1000) took %lld usec\n", elapsed); +} + +void Usage() { + fprintf(stderr, "Usage: mystery27 [-smallwork | -nowork] [-nolockbal] [-multilock] [-nocapture] [-dash0 | -dash1 | -dash2 | -dash3]\n"); + exit(0); +} + +int main(int argc, const char** argv) { + alwaysfalse = (time(NULL) == 0); // Never true but the compiler doesn't know that + Database db; + DbInit(&db); + CheckFakeWork(); + + // Command-line parameters, if any + workshift = 0; // 0..255 usec delay + workmul = 1; + lockbal = true; + lockmod = 0; + nocapture = false; + dashproc = &DoDebugDashboard; + for (int i = 1; i < argc; ++i) { + if (argv[i][0] != '-') {Usage();} + if (strcmp(argv[i], "-smallwork") == 0) {workshift = 3; workmul = 1;} // 0..15 usec delay + else if (strcmp(argv[i], "-nowork") == 0) {workshift = 8; workmul = 2;} // 0 usec delay + else if (strcmp(argv[i], "-nolockbal") == 0) {lockbal = false;} // No lock for balance transactions + else if (strcmp(argv[i], "-multilock") == 0) {lockmod = 3;} // Use multiple reader locks + else if (strcmp(argv[i], "-nocapture") == 0) {nocapture = true;} // Delay before re-acquire of locks + else if (strcmp(argv[i], "-dash0") == 0) {dashproc = &NoLockDebugDashboard;} // No dashboard locks at all + else if (strcmp(argv[i], "-dash1") == 0) {dashproc = &DoDebugDashboard;} // Long lock + else if (strcmp(argv[i], "-dash2") == 0) {dashproc = &BetterDebugDashboard;} // Early out long lock + else if (strcmp(argv[i], "-dash3") == 0) {dashproc = &EvenBetterDebugDashboard;} // Lock just copying + else {Usage();} + } + + // Launch several worker threads that update some shared data + ThreadInfo tinfo[4]; + for (int tnum = 0; tnum < 3; tnum++) { + tinfo[tnum].thread_num = tnum; + tinfo[tnum].db = &db; + int s = pthread_create(&tinfo[tnum].thread_id, NULL, &worker_thread, &tinfo[tnum]); + if (s != 0) {handle_error_en(s, "pthread_create");} + } + + // Launch a dashboard thread that reads the shared data + for (int tnum = 3; tnum < 4; tnum++) { + tinfo[tnum].thread_num = tnum; + tinfo[tnum].db = &db; + int s = pthread_create(&tinfo[tnum].thread_id, NULL, &dashboard_thread, &tinfo[tnum]); + if (s != 0) {handle_error_en(s, "pthread_create");} + } + + // Wait for all the threads to finish + for (int tnum = 0; tnum < 4; tnum++) { + int s = pthread_join(tinfo[tnum].thread_id, NULL); + if (s != 0) {handle_error_en(s, "pthread_join");} + CheckFakeWork(); + } + fprintf(stderr, "All threads finished\n"); + + return 0; +} + + + + + diff --git a/book-user-code/mystery3.cc b/book-user-code/mystery3.cc new file mode 100644 index 000000000000..f979be410d97 --- /dev/null +++ b/book-user-code/mystery3.cc @@ -0,0 +1,471 @@ +// Little program to time disk transfers +// Copyright 2021 Richard L. Sites + +// Usage: mystery3 +// Compile with +// g++ -O2 mystery3.cc -lrt -o mystery3_opt +// The -lrt flag option is required to use async i/o +// Using g++ instead of gcc because we are using C++ strings + +#include // Async I/O. MUST LINK WITH -lrt +#include +#include +#include +#include +#include +#include +#include // gettimeofday +#include +#include +#include + +#include + +#include "basetypes.h" +#include "polynomial.h" +#include "timecounters.h" + +using std::string; + +static const int kPageSize = 4096; // Must be a power of two +static const int kPageSizeMask = kPageSize - 1; + +// Make an array bigger than any expected cache size +static const int kMaxArraySize = 40 * 1024 * 1024; + + +// Order-of-magnitude times: +// One disk revolution at 7200 RPM = 8.33 msec +// One disk revolution at 5400 RPM = 11.11 msec +// If transfer rate is ~100 MB,sec, a track is ~1MB long, or 256 4KB blocks +// Time to transfer a single 4KB block is ~40 usec +// Seek time for big seek is perhaps 15 msec +// Seek time track-to-track is perhaps 5 msec + +// Allocate a byte array of given size, aligned on a page boundary +// Caller will call free(rawptr) +uint8* AllocPageAligned(int bytesize, uint8** rawptr) { + int newsize = bytesize + kPageSizeMask; + *rawptr = reinterpret_cast(malloc(newsize)); + uintptr_t temp = reinterpret_cast(*rawptr); + uintptr_t temp2 = (temp + kPageSizeMask) & ~kPageSizeMask; + return reinterpret_cast(temp2); +} + +// Zero a byte array +void ZeroAll(uint8* ptr, int bytesize) { + memset(ptr, 0, bytesize); +} + +// Fill byte array with non-zero pseudo-random bits +void PseudoAll(uint8* ptr, int bytesize) { + uint32* wordptr = reinterpret_cast(ptr); + int wordcount = bytesize >> 2; + uint32 x = POLYINIT32; + for (int i = 0; i < wordcount; ++i) { + *wordptr++ = x; + x = POLYSHIFT32(x); + } +} + +// Map all usec times into single character +// 0..100 usec to a..j +// 100..1000 usec to 1..9 +// 1000+ to A..Z +char OneChar(int64 usec) { + if (usec < 0) {return '-';} + if (usec < 10) {return '.';} + if (usec < 100) {return (usec / 10) + 'a';} + if (usec < 1000) {return (usec / 100) + '0';} + if (usec < 25000) {return (usec / 1000) + 'A';} + return '+'; +} + +void PrintLegend(FILE* f, const char* label) { + // ASCII art delta-times + fprintf(f, "%s: 4KB block delta times in usec\n", label); + fprintf(f, " - negative delta-time\n"); + fprintf(f, " . <10us delta-time\n"); + fprintf(f, " b<20 c<30 d<40 e<50 f<60 g<70 h<80 i<90 j<100us\n"); + fprintf(f, " 1<200 2<300 3<400 4<500 5<600 6<700 7<800 8<900 9<1000us\n"); + fprintf(f, " B<2 C<3 D<4 E<5 F<6 G<7 H<8 I<9 J<10.. Y<25ms\n"); + fprintf(f, " + >=25ms delta-time\n"); + fprintf(f, "\n"); +} + + +// Print out the delta times in usec +void PrintTimes(const char* fname, const char* label, const int64* usecperblock, int numblocks) { + FILE* f = fopen(fname, "w"); + if (f == NULL) {return;} + // Print ASCII art of the delta times + PrintLegend(f, label); + int runcount = 0; + int64 runtime = 0; + int64 currenttime = 0; + for (int i = 0; i < numblocks; ++i) { + if ((i & 255) == 255) {fprintf(f, " [%dMB]", (i + 1) / 256);} + if ((i & 63) == 63) {fprintf(f, "\n");} + + // Negative or large times (greater than 1000 usec) finish the old run + if ((usecperblock[i] < 0) || (usecperblock[i] > 1000)) { + if (runtime > 0) { + fprintf(f, "\n = %d blocks %5.3fms %4.2fMB/s\n", + runcount, runtime / 1000.0, (runcount * 4096.0) / runtime); + } + } + + currenttime += usecperblock[i]; + + // Negative or large times start a new run, do not contribute to total time + if ((usecperblock[i] < 0) || (usecperblock[i] > 1000)) { + runcount = 0; + runtime = 0; + fprintf(f, "(%+5.3fms) @ %5.3fms\n", usecperblock[i] / 1000.0, currenttime / 1000.0); + } else { + runtime += usecperblock[i]; + } + + fprintf(f, "%c", OneChar(usecperblock[i])); + ++runcount; + } + + if (runtime > 0) { + fprintf(f, "\n = %d blocks %5.3fms %4.2fMB/s\n", + runcount, runtime / 1000.0, (runcount * 4096.0) / runtime); + } + fprintf(f, "\n"); + + // Print raw delta-times + for (int i = 0; i < numblocks; ++i) { + fprintf(f, "%3lld ", usecperblock[i]); + if ((i & 255) == 255) {fprintf(f, " [%dMB]", (i + 1) / 256);} + if ((i & 15) == 15) {fprintf(f, "\n");} + } + fclose(f); +} + +// Print out the delta times in usec, suitable for JavaScript import +void PrintJSON(const char* fname, const char* label, const int64* usecperblock, int numblocks) { + FILE* f = fopen(fname, "w"); + if (f == NULL) {return;} + + bool reading = (strstr(label, "ead") != NULL); + + fprintf(f, " {\n"); + fprintf(f, " \"axisLabelX\" : \"Time (sec)\",\n"); + fprintf(f, " \"axisLabelY\" : \"Offset in file (blocks)\",\n"); + fprintf(f, " \"dotColor\" : \"%s\",\n", reading ? "blue" : "red"); + fprintf(f, " \"dotR\" : 3,\n"); + fprintf(f, " \"shortUnitsX\" : \"s\",\n"); + fprintf(f, " \"shortUnitsY\" : \"B\",\n"); + fprintf(f, " \"shortMulX\" : 1,\n"); + fprintf(f, " \"shortMulY\" : 4096,\n"); + fprintf(f, " \"thousandsX\" : 1000,\n"); + fprintf(f, " \"thousandsY\" : 1024,\n"); + fprintf(f, " \"title\" : \"Disk/SSD %s 4KB blocks vs. time\",\n", label); + fprintf(f, " \"points\" : [\n"); + + // Raw current times (w.r.t. start time) + int64 currenttime = 0; + for (int i = 0; i < numblocks; ++i) { + currenttime += usecperblock[i]; + fprintf(f, "[%8.6f, %5d],\n", currenttime / 1000000.0, i); + } + fprintf(f, "[999.000000, 0]\n"); // End marker; No comma + fprintf(f, "]}\n"); + + fclose(f); +} + + +// Timing disk reads +// Strategy: +// Write a file of pseudo-random data that is bigger than any expected +// on-disk track buffer. +// Do an asynchronous read into an all-zero buffer. +// While that is happening, scan the beginning of each 4KB block in the +// buffer looking for a change from zero to non-zero. +// Record the time at each change. +// After the read completes, return the delta-time for each block. +void TimeDiskRead(uint8* ptr, int bytesize, const char* filename, + int64* usecperblock, int numblocks) { + // Zero the array of block times + memset(usecperblock, 0, numblocks * sizeof(int64)); + + // Set buffer to pseudo-random non-zero bytes + PseudoAll(ptr, bytesize); + + // Open target file + fprintf(stdout, "TimeDiskRead opening %s for write\n", filename); + int fd = open(filename, O_WRONLY | O_CREAT, S_IRWXU); + if (fd < 0) {perror("TimeDiskRead write open"); return;} + + // Write it + ssize_t byteswritten = write(fd, ptr, bytesize); + if (byteswritten < 0) {perror("TimeDiskRead write"); return;} + close(fd); + + // Set buffer to zero bytes + ZeroAll(ptr, bytesize); + + // Prepare for asynchronous read + fprintf(stdout, "TimeDiskRead opening %s for read of %dKB\n", filename, numblocks * 4); + fd = open(filename, O_RDONLY | O_DIRECT | O_NOATIME | O_ASYNC); + if (fd < 0) {perror("TimeDiskRead read open"); return;} + + struct aiocb aiocbp; + memset(&aiocbp, 0, sizeof(aiocb)); + aiocbp.aio_fildes = fd; + aiocbp.aio_offset = 0; + aiocbp.aio_buf = ptr; + aiocbp.aio_nbytes = bytesize; + aiocbp.aio_reqprio = 0; + aiocbp.aio_sigevent.sigev_notify = SIGEV_NONE; + //aiocbp.aio_lio_opcode = LIO_NOP; + + // It is quite possible at this point that the write to disk is still happening. + // In that case, the lowest time we see after startusec might be much larger than + // one seek time. Doing a sync here helps separate the write time from the + // upcoming read time + syncfs(fd); + + // Start timer and the read + int64 startusec, stopusec; + startusec = GetUsec(); + + int ok = aio_read(&aiocbp); + if (ok < 0) {perror("TimeDiskRead aio_read"); return;} + + // Async read is now happening + + // Scanning the buffer for non-zero values may take longer than the time to read + // a single disk block. Scanning is good because it discovers blocks arriving + // in an arbitrary unexpected order. But to give better time resolution, we + // look every time at the next-sequential block also. + int expected_i = 0; // The 4KB block number we expect to be read next + uint8* expected_ptr4kb = ptr; // Its first word + int scancount = 0; + int changecount = 0; + int ptr4kb_incr = kPageSize / sizeof(uint32); + while(aio_error(&aiocbp) == EINPROGRESS) { + // Read is still going on + uint8* ptr4kb = ptr; + int64 timeusec = GetUsec(); + for (int i = 0; i < numblocks; ++i) { + if ((i & 255) == 0) {timeusec = GetUsec();} + // Scan for new non-zero values + if ((usecperblock[expected_i] == 0) && + (*reinterpret_cast(expected_ptr4kb) != 0)) { + // We just saw a change + usecperblock[expected_i] = timeusec; + ++changecount; + expected_i = expected_i + 1; // Expect next sequential block + expected_ptr4kb = expected_ptr4kb + kPageSize; + } + if ((usecperblock[i] == 0) && + (*reinterpret_cast(ptr4kb) != 0)) { + // We just saw a change + usecperblock[i] = timeusec; + ++changecount; + expected_i = i + 1; // Expect next sequential block + expected_ptr4kb = ptr4kb + kPageSize; + } + ptr4kb += kPageSize; // Next 4KB block + } + ++scancount; + } + // Async read is now complete + stopusec = GetUsec(); + double felapsedusec = stopusec - startusec; + + // Fill in any missed times + for (int i = 0; i < numblocks; ++i) { + if (usecperblock[i] == 0) {usecperblock[i] = stopusec;} + } + + + fprintf(stdout, "Async read startusec %lld, stopusec %lld, delta %lld\n", + startusec, stopusec, stopusec - startusec); + + fprintf(stdout, "scancount %d, changecount inside scan %d\n", + scancount, changecount); + fprintf(stdout, " %5.3fMB/sec overall\n\n", + bytesize / felapsedusec); + + ssize_t bytesread = aio_return(&aiocbp); + if (bytesread < 0) {perror("TimeDiskRead aio_read"); return;} + close(fd); + + // Put delta times into return array + int64 priorchangetime = startusec; + for (int i = 0; i < numblocks; ++i) { + int64 temp = usecperblock[i]; + usecperblock[i] = usecperblock[i] - priorchangetime; // This can be negative! + priorchangetime = temp; + } +} + + +// Timing disk writes +// Strategy: +// Write a buffer of pseudo-random data that is bigger than any expected +// on-disk track buffer. +// Do an asynchronous write to disk. +// While that is happening, scan the beginning of each 4KB block in the +// buffer writing the time. +// After the write completes, read back the data to see what times got to disk. +// Return the delta-time for each block. +void TimeDiskWrite(uint8* ptr, int bytesize, const char* filename, + int64* usecperblock, int numblocks) { + fprintf(stderr, "TimeDiskWrite to be completed\n"); + + // Zero the array of block times + memset(usecperblock, 0, numblocks * sizeof(int64)); + + // Set buffer to pseudo-random non-zero bytes + PseudoAll(ptr, bytesize); + + // Set the times at the front of each 4KB block all to zero + uint8* ptr4kb = ptr; // Its first 4KB block + for (int i = 0; i < numblocks; ++i) { + *reinterpret_cast(ptr4kb) = 0; + ptr4kb += kPageSize; + } + + // Prepare for asynchronous write + fprintf(stdout, "TimeDiskWrite opening %s for async write of %dKB\n", + filename, numblocks * 4); + int fd = open(filename, O_WRONLY | O_CREAT | O_DIRECT | O_NOATIME | O_ASYNC, S_IRWXU); + if (fd < 0) {perror("TimeDiskWrite write open"); return;} + + struct aiocb aiocbp; + memset(&aiocbp, 0, sizeof(aiocb)); + aiocbp.aio_fildes = fd; + aiocbp.aio_offset = 0; + aiocbp.aio_buf = ptr; + aiocbp.aio_nbytes = bytesize; + aiocbp.aio_reqprio = 0; + aiocbp.aio_sigevent.sigev_notify = SIGEV_NONE; + //aiocbp.aio_lio_opcode = LIO_NOP; + + // It is quite possible at this point that the open is still happening. + // In that case, the lowest time we see after startusec might be much larger than + // one seek time. Doing a sync here helps separate the open time from the + // upcoming write time + syncfs(fd); + + // Start timer and the write + int64 startusec, stopusec; + startusec = GetUsec(); + + int ok = aio_write(&aiocbp); + if (ok < 0) {perror("TimeDiskWrite aio_write"); return;} + + // Async write is now happening + + while(aio_error(&aiocbp) == EINPROGRESS) { + // Write is still going on + // Repeatedlly put current time into the front of each 4KB block + // (No shortcuts available to get better time resolution) + +//// +//// You get to fill in this part !! +//// + + } + // Async write is now complete + stopusec = GetUsec(); + double felapsedusec = stopusec - startusec; + + fprintf(stdout, "Async write startusec %lld, stopusec %lld, delta %lld\n", + startusec, stopusec, stopusec - startusec); + fprintf(stdout, " %5.3fMB/sec overall\n\n", + bytesize / felapsedusec); + + ssize_t byteswritten = aio_return(&aiocbp); + if (byteswritten < 0) {perror("TimeDiskWrite aio_write"); return;} + close(fd); + + // Now read back the file and see what times went out + // Open target file + fprintf(stdout, "TimeDiskWrite opening %s for read\n", filename); + fd = open(filename, O_RDONLY); + if (fd < 0) {perror("TimeDiskWrite read open"); return;} + + // Zero the buffer for cleanliness in case anything went wrong + ZeroAll(ptr, bytesize); + + // Read it + ssize_t bytesread = read(fd, ptr, bytesize); + if (bytesread < 0) {perror("TimeDiskWrite read"); return;} + close(fd); + + // Extract raw times from front of each 4KB block and put in return array + ptr4kb = ptr; // Its first 4KB block + for (int i = 0; i < numblocks; ++i) { + usecperblock[i] = *reinterpret_cast(ptr4kb); + ptr4kb += kPageSize; + } + // Fill in any missed times + for (int i = 0; i < numblocks; ++i) { + if (usecperblock[i] == 0) {usecperblock[i] = startusec;} + } + + // Put delta times into return array + int64 priorchangetime = startusec; + for (int i = 0; i < numblocks; ++i) { + int64 temp = usecperblock[i]; + usecperblock[i] = usecperblock[i] - priorchangetime; // This can be negative! + priorchangetime = temp; + } +} + +string StripSuffix(const char* fname) { + string str = string(fname); + size_t period = str.find_last_of('.'); + if (period == string::npos) {return str;} + return str.substr(0, period); +} + + +void Usage() { + fprintf(stderr, "Usage: mystery3 \n"); +} + +int main (int argc, const char** argv) { + if (argc < 2) {Usage(); return 0;} + + const char* filename = argv[1]; + + // Allocate a 40MB array aligned on a 4KB boundary + uint8* rawptr; + uint8* ptr = AllocPageAligned(kMaxArraySize, &rawptr); + + // Allocate usec counts per 4KB disk block. Signed to allow negative deltas + int numblocks = kMaxArraySize / kPageSize; + int64* usecperblock = new int64[numblocks]; + memset(usecperblock, 0, numblocks * sizeof(int64)); + + TimeDiskRead(ptr, kMaxArraySize, filename, usecperblock, numblocks); + string rtime_fname = StripSuffix(filename) + "_read_times.txt"; + string rjson_fname = StripSuffix(filename) + "_read_times.json"; + PrintTimes(rtime_fname.c_str(), "Read", usecperblock, numblocks); + PrintJSON(rjson_fname.c_str(), "Read", usecperblock, numblocks); + fprintf(stderr, "%s and %s written\n", rtime_fname.c_str(), rjson_fname.c_str()); + + TimeDiskWrite(ptr, kMaxArraySize, filename, usecperblock, numblocks); + string wtime_fname = StripSuffix(filename) + "_write_times.txt"; + string wjson_fname = StripSuffix(filename) + "_write_times.json"; + fprintf(stderr, "%s and %s written\n", wtime_fname.c_str(), wjson_fname.c_str()); + PrintTimes(wtime_fname.c_str(), "Write", usecperblock, numblocks); + PrintJSON(wjson_fname.c_str(), "Write", usecperblock, numblocks); + + delete[] usecperblock; + free(rawptr); + return 0; +} + + + + diff --git a/book-user-code/paging_hog.cc b/book-user-code/paging_hog.cc new file mode 100644 index 000000000000..7ad5c664210a --- /dev/null +++ b/book-user-code/paging_hog.cc @@ -0,0 +1,65 @@ +// Little program to fil up memory and take page faults +// Copyright 2021 Richard L. Sites +// +// compile with g++ -O2 paging_hog.cc kutrace_lib.cc -o paging_hog + +#include "stdlib.h" +#include "stdio.h" + +#include "basetypes.h" +#include "kutrace_lib.h" +#include "polynomial.h" + +//static const int64 kMAX_MB = 1000; // 1 GB +//static const int64 kMAX_MB = 5000; // 5 GB +static const int64 kMAX_MB = 8000; // 8 GB +//static const int64 kMAX_MB = 10000; // 10 GB + +static const int64 k40_MB = 40 * 1024 * 1024; + +int main (int argc, const char** argv) { + // Make an array to hold all the pointers + int64 chunkcount = (kMAX_MB << 20) / k40_MB; + char** chunks = (char**)malloc(chunkcount * sizeof(char*)); + + // Allocate chunks of 40MB until malloc fails, then back off one + fprintf(stdout, "Allocating up to %lld MB in %lld 40MB chunks\n", kMAX_MB, chunkcount); + uint32 x = POLYINIT32; + int64 chunklimit = 0; + for (int64 i = 0; i < chunkcount; ++i) { + chunklimit = i; + chunks[i] = (char*)malloc(k40_MB); + if (chunks[i] == NULL) { + // Allocation failed + fprintf(stdout, "Allocation of chunk %lld failed\n", i); + if (i == 0) {return 0;} // No room at all + // Make a little room by freeing the last successful 40MB + free(chunks[i - 1]); + chunklimit = i - 1; + break; + } + // We got a chunk. Write to each page so they are not the single all-zero page. + // We only need to touch one byte of each page to force a real allocation. + fprintf(stdout, "."); + if ((i % 25) == 24) {fprintf(stdout, "\n");} + kutrace::mark_d(i); + char* ptr = chunks[i]; + for (int k = 0; k < k40_MB; k += (1 << 12)) { + ptr[k] = (char)x; + x = POLYSHIFT32(x); + } + } + + // Scan the allocated area, creating ~1M page faults to/from disk + fprintf(stdout, "Scanning %lld 40MB chunks\n", chunklimit); + for (int64 i = 0; i < chunklimit; ++i) { + char* ptr = chunks[i]; + for (int k = 0; k < k40_MB; k += (1 << 12)) { + ptr[k] = (char)x; + x = POLYSHIFT32(x); + } + } + + return 0; +} + diff --git a/book-user-code/paging_hog_scan.cc b/book-user-code/paging_hog_scan.cc new file mode 100644 index 000000000000..09fe7a541477 --- /dev/null +++ b/book-user-code/paging_hog_scan.cc @@ -0,0 +1,73 @@ +// Little program to fil up memory and take page faults +// Copyright 2021 Richard L. Sites +// +// compile with g++ -O2 paging_hog.cc kutrace_lib.cc -o paging_hog + +#include "stdlib.h" +#include "stdio.h" + +#include "basetypes.h" +#include "kutrace_lib.h" +#include "polynomial.h" + +// We want this to be just big enough to start paging to disk without running forever +// Very dependant on CPU memory and what else is running +static const int64 kMAX_MB = 7500; // 7.5 GB + + +static const int64 k40_MB = 40 * 1024 * 1024; + +int main (int argc, const char** argv) { + int64z max_mb = kMAX_MB; + // Optional parameter gives max size in MB + if (1 < argc) { + max_mb = atoi(argv[1]); + } + // Make an array to hold all the pointers + int64 chunkcount = (max_mb << 20) / k40_MB; + char** chunks = (char**)malloc(chunkcount * sizeof(char*)); + + // Allocate chunks of 40MB until malloc fails, then back off one + fprintf(stdout, "Allocating up to %lld MB in %lld 40MB chunks\n", max_mb, chunkcount); + uint32 x = POLYINIT32; + int64 chunklimit = 0; + for (int64 i = 0; i < chunkcount; ++i) { + chunklimit = i; + chunks[i] = (char*)malloc(k40_MB); + if (chunks[i] == NULL) { + // Allocation failed + fprintf(stdout, "Allocation of chunk %lld failed\n", i); + if (i == 0) {return 0;} // No room at all + // Make a little room by freeing the last successful 40MB + free(chunks[i - 1]); + chunklimit = i - 1; + break; + } + // We got a chunk. Write to each page so they are not the single all-zero page. + // We only need to touch one byte of each page to force a real allocation. + fprintf(stdout, "."); + if ((i % 25) == 24) {fprintf(stdout, "\n");} + kutrace::mark_d(i); + char* ptr = chunks[i]; + for (int k = 0; k < k40_MB; k += (1 << 12)) { + ptr[k] = (char)x; + x = POLYSHIFT32(x); + } + } + + // Scan the allocated area, creating ~1M page faults to/from disk + fprintf(stdout, "Scanning %lld 40MB chunks\n", chunklimit); + kutrace::goipc(argv[0]); + for (int64 i = 0; i < chunklimit; ++i) { + char* ptr = chunks[i]; + kutrace::mark_d(i); + for (int k = 0; k < k40_MB; k += (1 << 12)) { + ptr[k] = (char)x; + x = POLYSHIFT32(x); + } + } + kutrace::stop("ku_hog_scan.trace"); + + return 0; +} + diff --git a/book-user-code/pcaptojson.cc b/book-user-code/pcaptojson.cc new file mode 100644 index 000000000000..25d33fea9aae --- /dev/null +++ b/book-user-code/pcaptojson.cc @@ -0,0 +1,176 @@ +// Little program to read tcpdump output file and extract packets with +// dclab headers. Snarf at least 64 bytes for this to work. +// +// TODO: we need to know whether packet is incoming or outgoing, so we +// need to know which machine this trace was taken on. +// +// Copyright 2021 Richard L. Sites +// +// Compile with g++ -O2 pcaptojson.cc dclab_log.cc dclab_rpc.cc -lpcap -o pcaptojson + + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "basetypes.h" +#include "dclab_rpc.h" +#include "kutrace_lib.h" + +#define PCAP_BUF_SIZE 1024 +#define PCAP_SRC_FILE 2 + +void dumpbytes(const u_char* d, int len) { + for (int i = 0; i len); +//dumpbytes(packet, 64); + + // Ignore too-short packets eth 14(+2) + ip 20 + tcp 20 + rpc 16 + rpcid 4 = 76 + if (pkthdr->len < 76) {return;} + + // Ignore non-IP packets + // Hack: There are two extra bytes before the ether_type field for some reason + const u_char* ethstart = packet + 2; + ethernetHeader = (struct ether_header*)(ethstart); +//fprintf(stderr, " ether_type %04x\n", ntohs(ethernetHeader->ether_type)); + if (ntohs(ethernetHeader->ether_type) != ETHERTYPE_IP) {return;} + + // Ignore non-TCP packets + const u_char* ipstart = ethstart + sizeof(struct ether_header); + ipHeader = (struct ip*)(ipstart); +//fprintf(stderr, " ip_p %04x\n", ipHeader->ip_p); + if (ipHeader->ip_p != IPPROTO_TCP) {return;} + + // Pick out the source and dest IPv4 addresses + bool rx = true; + uint32 src_ipv4 = *(uint32*)(&ipHeader->ip_src); + uint32 dst_ipv4 = *(uint32*)(&ipHeader->ip_dst); + + if (our_ipv4 == src_ipv4) {rx = false;} + else if (our_ipv4 == dst_ipv4) {rx = true;} + else { + if (first_error) { + fprintf(stderr, "No IP address match. We are %08x, src %08x, dst %08x\n", + our_ipv4, src_ipv4, dst_ipv4); + fprintf(stderr, " Ignoring packet\n"); + first_error = false; + return; + } + } + + + const u_char* tcpstart = ipstart + (ipHeader->ip_hl * 4); + tcpHeader = (struct tcphdr*)(tcpstart); + const u_char* datastart = tcpstart + (tcpHeader->doff * 4); +//dumpbytes(datastart, 32); + data = (u_char*)(datastart); + datalength = pkthdr->len - (datastart - packet); +//fprintf(stderr, " datalength %u\n", datalength); + + // Ignore too-short data rpc 16 + rpcid 4 + if (datalength < 24) {return;} + + const RPCMarker* rpcmarker = (const RPCMarker*)(data); + // Ignore packets without our signature word at the front +//fprintf(stderr, " signature %08x %08x %08x %08x\n", +//rpcmarker->signature, rpcmarker->headerlen, rpcmarker->datalen, rpcmarker->checksum); + if (!ValidMarker(rpcmarker)) {return;} + + // We have a valid marker, so a likely dclab message beginning + + // Extract the message length and RPCID low 16 bits + uint32 msg_len = rpcmarker->datalen; + const RPCHeader* rpcheader = (const RPCHeader*)(data + sizeof(RPCMarker)); + uint32 msg_rpcid = rpcheader->rpcid & 0xFFFF; + + // Write a json line + // timestamp is seconds within minute and fraction. Seconds must be three chars. + // This will not line up with KUtrace data if the KUtrace started in a different minute. + // So we record the first time encountered for tcpalign to fix this later. + if (first_time) { + // Put the basetime into the output JSON file. Note leading space. + first_time = false; + basetime = (pkthdr->ts.tv_sec / 60) * 60; // Round down to minute + struct tm* tmbuf = localtime(&pkthdr->ts.tv_sec); + fprintf(stdout, " \"tcpdumpba\" : \"%04d-%02d-%02d_%02d:%02d:00\",\n", + tmbuf->tm_year + 1900, tmbuf->tm_mon + 1, tmbuf->tm_mday, + tmbuf->tm_hour, tmbuf->tm_min); + } + uint32 ts_seconds = pkthdr->ts.tv_sec - basetime; + uint32 ts_usec = pkthdr->ts.tv_usec; + + // [ ts, dur, cpu, pid, rpcid, event, arg, ret, ipc, "name"], + // [ 53.66795600, 0.00000001, 0, 0, rrrr, 516, length, 0, 0, "rpc.rrrr"], + uint32 event = rx ? KUTRACE_RPCIDRXMSG : KUTRACE_RPCIDTXMSG; + fprintf(stdout, "[%3u.%06u00, 0.00000001, 0, 0, %u, %u, %u, 0, 0, \"rpc.%u\"],\n", + ts_seconds, ts_usec, msg_rpcid, event, msg_len, msg_rpcid); +} + +void usage() { + fprintf(stderr, "usage: pcaptojson \n"); + fprintf(stderr, "example pcaptojson server_tcpdump.pcap 192.168.1.61\n"); + exit(-1); +} + +int main(int argc, const char **argv) { + if(argc != 3) { + usage(); + } + const char* filename = argv[1]; + const char* ipstring = argv[2]; + int byte1, byte2, byte3, byte4; + int n = sscanf(ipstring, "%d.%d.%d.%d", &byte1, &byte2, &byte3, &byte4); + if (n != 4) {usage();} + uint32 our_ipv4 = (byte4 << 24) | (byte3 << 16) | (byte2 << 8) | (byte1 << 0); + + pcap_t *fp; + char errbuf[PCAP_ERRBUF_SIZE]; + //char source[PCAP_BUF_SIZE]; + //int i, maxCountSyn = 0, maxCountHttp = 0, maxIdxSyn = 0, maxIdxHttp = 0; + + fp = pcap_open_offline(filename, errbuf); + if (fp == NULL) { + fprintf(stderr, "\npcap_open_offline() failed: %s\n", errbuf); + return 0; + } + + if (pcap_loop(fp, 0, packetHandler, (u_char*)(&our_ipv4)) < 0) { + fprintf(stderr, "\npcap_loop() failed: %s\n", pcap_geterr(fp)); + return 0; + } + + return 0; +} + + + + diff --git a/book-user-code/polynomial.h b/book-user-code/polynomial.h new file mode 100644 index 000000000000..3babe72a4166 --- /dev/null +++ b/book-user-code/polynomial.h @@ -0,0 +1,27 @@ +// Simple pseudo-random uint8, 32, 64, based on crc32c etc. +// Copyright 2021 Richard L. Sites + +#ifndef __POLYNOMIAL_H__ +#define __POLYNOMIAL_H__ + +#include "basetypes.h" + +// x should be declared uint8 +#define POLY8 (0x1d) // From CRC-8-SAE J1850 +#define POLYSHIFT8(x) ( ((x) << 1) ^ static_cast((static_cast((x)) >> 7) & POLY8) ) +#define POLYINIT8 (0xffu) + +// x should be declared uint32 +#define POLY32 (0x04c11db7) // From CRC-32 +#define POLYSHIFT32(x) ( ((x) << 1) ^ static_cast((static_cast((x)) >> 31) & POLY32) ) +#define POLYINIT32 (0xffffffffu) + +// x should be declared uint64 +#define POLY64 (0x42F0E1EBA9EA3693lu) // From CRC-64-ECMA +#define POLYSHIFT64(x) ( ((x) << 1) ^ static_cast((static_cast((x)) >> 63) & POLY64) ) +#define POLYINIT64 (0xfffffffffffffffflu) + +#endif // __POLYNOMIAL_H__ + + + diff --git a/book-user-code/postproc3.sh b/book-user-code/postproc3.sh new file mode 100755 index 000000000000..76ad473009f0 --- /dev/null +++ b/book-user-code/postproc3.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# arg 1 filename stem (no .trace), arg 2 "title", arg 3/4 spantrim args + +export LC_ALL=C + +cat $1.trace |./rawtoevent |sort -n |./eventtospan3 "$2" |sort >$1.json +echo " $1.json written" + +trim_arg='0' +if [ -n "$3" ] +then +delimit=' ' +trim_arg=$3$delimit$4 +fi + +cat $1.json |./spantotrim $trim_arg |./makeself show_cpu.html >$1.html +echo " $1.html written" + +google-chrome $1.html & diff --git a/book-user-code/qt_20210206_085042_dclab-2_19105.html b/book-user-code/qt_20210206_085042_dclab-2_19105.html new file mode 100644 index 000000000000..7f1b2fbf9c23 --- /dev/null +++ b/book-user-code/qt_20210206_085042_dclab-2_19105.html @@ -0,0 +1,8523 @@ + + + + + + + + + + + + + + + + + + + + + +       +Annot: + + + +   Option: + + + + + + + + + + + + +   Search: + + + + +usec: + + +.. + + + + + +
+ + + + + + +
+ + + + + + + + + + + + + + Shift-click 1-5 to save, click to restore. Axes: scroll wheel to zoom, + drag to pan. Items: shift-click-unclick to annotate. + Shift-click-unshift to keep, shift-drag to measure. + Red dot resets. + [more] + + +
+   +------------------------------------------------------------------------+
+   |            (0) UI controls (HTML)                                      |
+   +------------------------------------------------------------------------+
+   |            (1) Title                                                   |
+   +----+--------------------------------------------------------------+----+
+   |(2) |                                                              |(4) |
+   |Y-  |       (3) Main SVG drawing area                              |IPC |
+   |axis|                                                              |    |
+   |    |                                                              |    |
+   +----+--------------------------------------------------------------+----+
+   |            (5) X-axis                                                  |
+   +----+--------------------------------------------------------------+----+
+   |            (6) UI hint text (HTML)                                     |
+   +----+--------------------------------------------------------------+----+
+
+   List of UI active areas
+   In general, click toggles buttons while shift-click cycles through more choices
+   In general, shift-click-unclick to annotate/highlight; shift-click-unshift to 
+     keep multiple results onscreen at once
+
+   (0) Text and buttons
+     User annotate PID names	 off/on
+     All  annotate all items	 off/on
+
+     Mark mark_x events	 	 off/both/text/numbers
+     Arc  wakeup arcs		 off/on
+     Wait Reasons for not exec.	 off/on  Always on now
+     Slow CPU clock freq	 off/gradient
+
+     IPC  instructions per cycle off/both/user/kernel
+     Samp PC samples		 off/on
+     ovhd Approx.overhead times	 off/on  Always on now
+     Lock lock-held lines	 off/on
+     CB   color blind colors	 off/on
+
+     Search:    regex string match 
+     !	        invert search	 off/on, like grep -v
+     usec:      match only event duration in [lo..hi]
+  
+   (1) Title	text size	1.0/1.25/1.50/0.75 times default
+  
+   (2) Y-axis group triangles	collapse/expand/gray=one-fourth/gray=one-twentieth
+       Y-axis labels		mouse and wheel to pan/zoom vertically
+       Y-axis labels		shift-click:normal/highlight
+  
+   (3) anywhere 		mouse and wheel to pan/zoom horizontally	
+     Red dot 			resets display
+     Yellow overlay indicates slow CPU clock rate (power saving)
+    
+   (5) Basetime date		normal/relative to row start
+  
+   (6) Buttons
+     1 2 3 4 5			shift-click:save, click:restore current view
+     double-arrow		toggles between last two views
+     [more]			this text off/on
+
+International Morse Code (for wait events)
+  A    .-    B    -...  Cpu  -.-.  Disk -.. 
+  E    .     F    ..-.  G    --.   H    ....
+  I    ..    J    .---  tasK -.-   Lock .-..
+  Mem  --    Net  -.    O    ---   Pipe .--.
+  Q    --.-  R    .-.   Sche ...   Time -   
+  U    .. -  V    ...-  W    .--   X    -..-
+  Y    -.--  Z    --..
+
+
+ +
+ + + + + diff --git a/book-user-code/qt_20210206_085326_dclab-2_19423.html b/book-user-code/qt_20210206_085326_dclab-2_19423.html new file mode 100644 index 000000000000..39220d46a264 --- /dev/null +++ b/book-user-code/qt_20210206_085326_dclab-2_19423.html @@ -0,0 +1,8523 @@ + + + + + + + + + + + + + + + + + + + + + +       +Annot: + + + +   Option: + + + + + + + + + + + + +   Search: + + + + +usec: + + +.. + + + + + +
+ + + + + + +
+ + + + + + + + + + + + + + Shift-click 1-5 to save, click to restore. Axes: scroll wheel to zoom, + drag to pan. Items: shift-click-unclick to annotate. + Shift-click-unshift to keep, shift-drag to measure. + Red dot resets. + [more] + + +
+   +------------------------------------------------------------------------+
+   |            (0) UI controls (HTML)                                      |
+   +------------------------------------------------------------------------+
+   |            (1) Title                                                   |
+   +----+--------------------------------------------------------------+----+
+   |(2) |                                                              |(4) |
+   |Y-  |       (3) Main SVG drawing area                              |IPC |
+   |axis|                                                              |    |
+   |    |                                                              |    |
+   +----+--------------------------------------------------------------+----+
+   |            (5) X-axis                                                  |
+   +----+--------------------------------------------------------------+----+
+   |            (6) UI hint text (HTML)                                     |
+   +----+--------------------------------------------------------------+----+
+
+   List of UI active areas
+   In general, click toggles buttons while shift-click cycles through more choices
+   In general, shift-click-unclick to annotate/highlight; shift-click-unshift to 
+     keep multiple results onscreen at once
+
+   (0) Text and buttons
+     User annotate PID names	 off/on
+     All  annotate all items	 off/on
+
+     Mark mark_x events	 	 off/both/text/numbers
+     Arc  wakeup arcs		 off/on
+     Wait Reasons for not exec.	 off/on  Always on now
+     Slow CPU clock freq	 off/gradient
+
+     IPC  instructions per cycle off/both/user/kernel
+     Samp PC samples		 off/on
+     ovhd Approx.overhead times	 off/on  Always on now
+     Lock lock-held lines	 off/on
+     CB   color blind colors	 off/on
+
+     Search:    regex string match 
+     !	        invert search	 off/on, like grep -v
+     usec:      match only event duration in [lo..hi]
+  
+   (1) Title	text size	1.0/1.25/1.50/0.75 times default
+  
+   (2) Y-axis group triangles	collapse/expand/gray=one-fourth/gray=one-twentieth
+       Y-axis labels		mouse and wheel to pan/zoom vertically
+       Y-axis labels		shift-click:normal/highlight
+  
+   (3) anywhere 		mouse and wheel to pan/zoom horizontally	
+     Red dot 			resets display
+     Yellow overlay indicates slow CPU clock rate (power saving)
+    
+   (5) Basetime date		normal/relative to row start
+  
+   (6) Buttons
+     1 2 3 4 5			shift-click:save, click:restore current view
+     double-arrow		toggles between last two views
+     [more]			this text off/on
+
+International Morse Code (for wait events)
+  A    .-    B    -...  Cpu  -.-.  Disk -.. 
+  E    .     F    ..-.  G    --.   H    ....
+  I    ..    J    .---  tasK -.-   Lock .-..
+  Mem  --    Net  -.    O    ---   Pipe .--.
+  Q    --.-  R    .-.   Sche ...   Time -   
+  U    .. -  V    ...-  W    .--   X    -..-
+  Y    -.--  Z    --..
+
+
+ +
+ + + + + diff --git a/book-user-code/queuetest.cc b/book-user-code/queuetest.cc new file mode 100644 index 000000000000..098010d543f5 --- /dev/null +++ b/book-user-code/queuetest.cc @@ -0,0 +1,693 @@ +// Little program to exercise queues +// Copyright 2021 Richard L. Sites +// +// This program cranks up several tasks that service "work" entries on queues, +// each task looping CPU-bound for a specified number of microseconds and then +// passing the work task on to a subsequent queue. The last queue is always +// number zero, which finishes the work entry and deletes it. +// The main program produces N work entries and then exits. Each piece of work +// is a pseudo-RPC. It is logged at beginning and end, using the dclab_log +// framework that is used for other RPCs. +// +// Multiple work entries are active at once and they may interfere with each +// other. Some entries will wait in queues for previously-queued work. The +// net effect is that some work entries will take much longer than might be +// expected. +// +// Using KUtrace with this program will show the dynamics that lead to such +// long transaction delays. +// +// Work entries are produced by the main program at varying intervals, some of +// which are quite short. The entries specify varying sequences of queues to +// sequence through and varying amounts of "work" for each queue task. The +// variations come in two forms, uniform pseudo-random intervals, and skewed +// ones. The sequences also come in uniform and skewed forms, with the latter +// putting more work into some queues and less work into others. +// +// Command-line parameters: +// -rate generate approximately num transactions per second +// -n generate num transactions and then stop +// -skew use skewed intervals and queues (default is uniform) +// -s show acquire/release for spinlocks (debug aid) +// -v verbose +// +// Outputs: +// dclab transaction log file written to constructed file name of form +// queuetest_20210126_145625_dclab-2_2614.log +// +// Number of transactions, number dropped as too busy +// +// compile with g++ -O2 -pthread queuetest.cc dclab_log.cc dclab_rpc.cc kutrace_lib.cc -o queuetest + +#include + +#include +#include +#include +#include +#include // for SYS_xxx definitions +#include // for nanosleep +#include // for syscall + +#include "basetypes.h" +#include "dclab_log.h" +#include "dclab_rpc.h" +#include "kutrace_lib.h" +#include "polynomial.h" +#include "timecounters.h" + +using std::string; + +static const int kMaxTransInFlight = 40; + +static const uint32 kMaxShortQUsec = 1000; // Uniform/skew average 0.5/0.75 * this +static const uint32 kMaxLongQUsec = 4000; // Uniform/skew average 0.5/0.75 * this + +static const int kIterations = 140; // NOTE: 140 is ~1 usec loop on Intel i3 + // Adjust as needed + + +// Queue tasks use futex to wait on empty queues + +// Queue zero task launches and also terminates transactions. It keeps a count +// of transactions in flight and rejects new transactions with "too busy" +// status code if the number in flight is above a specified limit. If the rate +// of new transactions exceeds the rate of finishing transactions, the number +// in flight will saturate near this limit. This models real datacenter +// overload behavior that would result in HTTP 503 "Service Unavailable" error +// response status codes. + +// +// Work item. It includes next pointer for queueing +// +typedef struct { + uint32 queue_num; // Queue number to be on + uint32 usec_busy; // How much work to do +} OneWork; + +typedef struct WorkT { + WorkT* next; + int trans_num; // To trackhow long each takes + OneWork onework[4]; // Up to four steps of work to do + BinaryLogRecord log; // Mostly to log start/stop times as pseudo-RPC +} Work; + + +// +// Simple queue of work to do +// Only manipulated after acquiring lock (i.e. lock is set) +// +typedef struct { + Work* head; + Work* tail; + int count; // Number of items on this queue + char lock; +} Queue; + +typedef struct { + Queue* queue; // Array of all queues (so work can be passed around) + FILE* logfile; // Output log file (PrimaryTask writes once per transaction) + uint32 i; // Which queue we are processing +} PerThreadData; + + + +// +// Work patterns +// +// Queue[0] is start/stop +// Queues [1..3] are shorter amount of work +// Queues [4..7] are longer amount of work + +// Average M is 1000+4000 = 5000us +// At average time = M * 0.5, each transaction is about 2.5 ms, or 400/sec per CPU +// This gives about 1600 trans/sec for 4 CPUs. Any rate faster than this will +// get behind and start dropping work +static const OneWork kUniformWorkPattern[16][4] = { + {{1, kMaxShortQUsec}, {4, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{2, kMaxShortQUsec}, {5, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{3, kMaxShortQUsec}, {4, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{1, kMaxShortQUsec}, {5, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{2, kMaxShortQUsec}, {4, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{3, kMaxShortQUsec}, {5, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{1, kMaxShortQUsec}, {4, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{2, kMaxShortQUsec}, {5, kMaxLongQUsec}, {0, 0}, {0, 0}}, + + {{3, kMaxShortQUsec}, {4, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{1, kMaxShortQUsec}, {5, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{2, kMaxShortQUsec}, {4, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{3, kMaxShortQUsec}, {5, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{1, kMaxShortQUsec}, {4, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{2, kMaxShortQUsec}, {5, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{3, kMaxShortQUsec}, {4, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{1, kMaxShortQUsec}, {5, kMaxLongQUsec}, {0, 0}, {0, 0}}, +}; + +// Average M is 1000 + 4000 + 4000/4 = 6000us +// At average time = M * 0.75, each transaction is about 4.5 ms, or 222/sec per CPU +// This gives about 888 trans/sec for 4 CPUs. Any rate faster than this will +// get behind and start dropping work +static const OneWork kSkewedWorkPattern[16][4] = { + {{1, kMaxShortQUsec}, {4, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{2, kMaxShortQUsec}, {5, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{1, kMaxShortQUsec}, {6, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{3, kMaxShortQUsec}, {4, kMaxLongQUsec}, {5, kMaxLongQUsec}, {0, 0}}, + {{1, kMaxShortQUsec}, {4, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{2, kMaxShortQUsec}, {5, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{1, kMaxShortQUsec}, {6, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{3, kMaxShortQUsec}, {4, kMaxLongQUsec}, {5, kMaxLongQUsec}, {0, 0}}, + + {{1, kMaxShortQUsec}, {4, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{2, kMaxShortQUsec}, {5, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{1, kMaxShortQUsec}, {6, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{3, kMaxShortQUsec}, {4, kMaxLongQUsec}, {5, kMaxLongQUsec}, {0, 0}}, + {{1, kMaxShortQUsec}, {4, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{2, kMaxShortQUsec}, {5, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{1, kMaxShortQUsec}, {6, kMaxLongQUsec}, {0, 0}, {0, 0}}, + {{3, kMaxShortQUsec}, {4, kMaxLongQUsec}, {5, kMaxLongQUsec}, {0, 0}}, +}; + +// Globals +static bool nevertrue; // Set false in main program, but compiler doesn't know the value + +static bool trace_spinlocks = false; +static bool verbose = false; + +static volatile int pending_count = 0; +static volatile int dropped_count = 0; +static int64* delay_times; // In usec +static int64* transaction_times; // In usec + +static char serial_lock = 0; // For debugging, uncomment the uses of this + +inline uint64 uint32max(uint32 a, uint32 b) {return (a > b) ? a : b;} +inline uint64 uint32min(uint32 a, uint32 b) {return (a < b) ? a : b;} + +inline uint32 Rrange8(uint32 rand) {return rand & 0xFF;} +inline uint32 Rscale4(uint32 rand) {return (rand >> 8) & 0xF;} +inline uint32 Rdelay8(uint32 rand) {return (rand >> 24) & 0xFF;} + +void UpdateRand(uint32* rand) { + *rand = POLYSHIFT32(*rand); + *rand = POLYSHIFT32(*rand); + *rand = POLYSHIFT32(*rand); +} + +// Sleep for n microseconds +void usecsleep(uint32 usec) { + struct timespec ts; + ts.tv_sec = usec / 1000000; + ts.tv_nsec = (usec % 1000000) * 1000; + nanosleep(&ts, NULL); +} + +// Returns average waiting usec to get the approximate given rate of transactions/sec +// Uniform rate is M * 0.5, where M is the maximum delay +// Skewed rate is M * 0.75 +// So for 1000/sec, uniform is 2000000 usec / 1000 gives M = 2000 usec +// So for 1000/sec, skewed is 4000000 usec / 3000 gives M = 1333 usec +uint32 RateToMax(uint32 rate, bool skew) { + if (!skew) {return 2000000 / rate;} + return 4000000 / (rate * 3); +} + +// For skewed distribution, use range [0..max*8) +// Input value uniform is already M * 0.5 +uint32 Skewed(uint32 rand, uint32 uniform) { + uint32 scale = Rscale4(rand); + if (scale & 1) {return uniform >> 1;} // xxx1 8*0.5 = 4 + if (scale & 2) {return uniform;} // xx10 4*1 = 4 + if (scale & 4) {return uniform << 1;} // x100 2*2 = 4 + if (scale & 8) {return uniform << 2;} // 1000 1*4 = 4 + return uniform << 3; // 0000 1*8 = 8 + // avg = 24/16 = 1.5*uniform = 0.75 * M +} + +// For uniform distribution, use range [0..max) +uint32 GetWorkRand(uint32 rand, int max, bool skew) { + // max * [0..255]/256 + uint32 uniform = (max * Rrange8(rand)) >> 8; + if (!skew) {return uniform;} // M * 0.5 avg + return Skewed(rand, uniform); // M * 0.75 +} + +// Result in microseconds, [0..max) for uniform +uint32 GetDelayRand(uint32 rand, int max, bool skew) { + // max * [0..255]/256 + uint32 uniform = (max * Rdelay8(rand)) >> 8; + if (!skew) {return uniform;} // M * 0.5 avg + return Skewed(rand, uniform); // M * 0.75 +} + + +// +// Plain spinlock. It just covers some few-instruction sequences so almost never blocks +// +// The constructor acquires the spinlock and the destructor releases it. +// Thus, just declaring one of these in a block makes the block run *only* when +// holding the lock and then reliably release it at block exit +class PlainSpinLock { +public: + PlainSpinLock(volatile char* lock); + ~PlainSpinLock(); + volatile char* lock_; +}; + +PlainSpinLock::PlainSpinLock(volatile char* lock) { + lock_ = lock; + bool already_set; + if (trace_spinlocks) kutrace::mark_b("a"); + do { + while (*lock_ != 0) {} // Spin without writing while someone else holds the lock + // Try to get the lock + already_set = __atomic_test_and_set(lock_, __ATOMIC_ACQUIRE); + } while (already_set); + if (trace_spinlocks) kutrace::mark_b("/"); +} + +PlainSpinLock::~PlainSpinLock() { + if (trace_spinlocks) kutrace::mark_b("r"); + __atomic_clear(lock_, __ATOMIC_RELEASE); +} + +// +// Simple queue of work to do +// + +void InitQueue(Queue* queue) { + memset(queue, 0, sizeof(Queue)); +} + +void DumpQueues(FILE* f, const char* label, Queue* queue) { + for (int i = 0; i < 8; ++i) { + PlainSpinLock spinlock(&queue[i].lock); + fprintf(f, "%s dumpQueues[%d] ", label, i); + const Work* p = queue[i].head; + while (p != NULL) { + fprintf(f, "%08x ", p->log.rpcid); + p = p->next; + } + fprintf(f, "\n"); + } +} + + +void EnqueueBad(Work* item, Queue* queue, int queue_num) { + PlainSpinLock spinlock(&queue->lock); +////fprintf(stderr, "Enqueue %08x on %d\n", item->log.rpcid, queue_num); + + item->next = NULL; + if (queue->head == NULL) { + queue->head = item; + } else { + queue->tail->next = item; + } + queue->tail = item; + ++queue->count; + kutrace::addevent(KUTRACE_ENQUEUE, queue_num); + syscall(SYS_futex, &queue->count, FUTEX_WAKE, 0, NULL, NULL, 0); + // BUG + // We are still holding the spinlock when FUTEX_WAKE returns. Awakening process + // may spin a little. Or a WHOLE LOT if we get context switched out inside futex... +} + +void EnqueueFixed(Work* item, Queue* queue, int queue_num) { + do { + PlainSpinLock spinlock(&queue->lock); +////fprintf(stderr, "Enqueue %08x on %d\n", item->log.rpcid, queue_num); + + item->next = NULL; + if (queue->head == NULL) { + queue->head = item; + } else { + queue->tail->next = item; + } + queue->tail = item; + ++queue->count; + } while(false); + // Spinlock is now released. + kutrace::addevent(KUTRACE_ENQUEUE, queue_num); + syscall(SYS_futex, &queue->count, FUTEX_WAKE, 0, NULL, NULL, 0); +} + +#ifdef FIXED +inline void Enqueue(Work* item, Queue* queue, int queue_num) {EnqueueFixed(item, queue, queue_num);} +#else +inline void Enqueue(Work* item, Queue* queue, int queue_num) {EnqueueBad(item, queue, queue_num);} +#endif + +Work* Dequeue(Queue* queue, int queue_num) { + PlainSpinLock spinlock(&queue->lock); + kutrace::addevent(KUTRACE_DEQUEUE, queue_num); + + Work* item = queue->head; + queue->head = item->next; // Note: When this goes NULL, tail is garbage + --queue->count; +////fprintf(stderr, "Dequeue %08x from %d\n", item->log.rpcid, queue_num); + return item; +} + +uint32 GetRpcid(uint32* rand) { + uint32 retval = *rand; + UpdateRand(rand); + return retval; +} + +void InitWork(Work* work) { + memset(work, 0, sizeof(Work)); +} + +void DumpWork(FILE* f, const Work* work, bool brief) { + if (brief) { + fprintf(f, "%5d: ", work->log.rpcid); + for (int i = 0; i < 4; ++i) { + fprintf(f, "%u %u ", work->onework[i].queue_num, work->onework[i].usec_busy); + } + fprintf(f, "\n"); + return; + } + + fprintf(f, "DumpWork\n"); + for (int i = 0; i < 4; ++i) { + fprintf(f, "%u %u ", work->onework[i].queue_num, work->onework[i].usec_busy); + } + fprintf(f, "\n"); + PrintLogRecord(f, &work->log); +} + +Work* CreateWork(int trans_num, uint32* rand, bool skew) { + Work* work = new Work; + InitWork(work); + work->trans_num = trans_num; + // Fill in the logrecord fields + work->log.rpcid = rpcid32_to_rpcid16(GetRpcid(rand)); + work->log.req_send_timestamp = GetUsec(); + work->log.lglen1 = TenLg(sizeof(Work)); + work->log.lglen2 = work->log.lglen1; + strcpy(work->log.method, "Work"); + work->log.datalength = sizeof(Work); + + // Fill in the actual work specification from selected pattern + uint32 select = Rscale4(*rand); // Which of 16 patterns to use + UpdateRand(rand); + + const OneWork* pattern = (skew ? kSkewedWorkPattern : kUniformWorkPattern)[select]; + for (int i = 0; i < 4; ++i) { + work->onework[i].queue_num = pattern[i].queue_num; + work->onework[i].usec_busy = GetDelayRand(*rand, pattern[i].usec_busy, skew); + UpdateRand(rand); + } + +if (verbose) DumpWork(stderr, work, true); + // DumpWork(stderr, work, false); + return work; +} + +void DeleteWork(Work* work) { + delete work; +} + +// Constructs N work entries and send them to primary queue +void GenerateLoop(int n, uint32 rate, bool skew, Queue* primaryqueue) { + uint32 rand = POLYINIT32; + uint32 max_delay_usec = RateToMax(rate, skew); + + for (int i = 0; i < n; ++i) { + kutrace::mark_d(pending_count); + Work* work = CreateWork(i, &rand, skew); + + kutrace::addname(KUTRACE_METHODNAME, work->log.rpcid, work->log.method); + kutrace::addevent(KUTRACE_RPCIDREQ, work->log.rpcid); + Enqueue(work, primaryqueue, 0); + kutrace::addevent(KUTRACE_RPCIDREQ, 0); + + // Wait xx microseconds + uint32 wait_usec = GetDelayRand(rand, max_delay_usec, skew); + UpdateRand(&rand); + delay_times[i] = wait_usec; + usecsleep(wait_usec); + } + + // Wait for transactions to finish, pending_count == 0, before returning + // MINOR BUG: this can stop early if the first RPC has not yet been pulled off by PrimaryTask, + // which makes pending_count non-zero... however, we wait several usec before getting here. + // But if we increment pending-count here, we can queue more than 50 items and then + // PrimaryTask will delete the early ones instead of the late ones. + // On the other hand, a real client would be getting response messages so no problem... + kutrace::mark_a("finish"); + while (pending_count != 0) {} + + kutrace::mark_a("/"); +} + + +// PrimaryTask launches and terminates work, logging each begin and end. +// Every work entry comes here twice, at beginning and end. +// A special "stop" work entry causes PrimaryTask to wait until all previous +// work entries have finished and then it terminates. +void* PrimaryTask(void* arg) { + PerThreadData* perthreaddata = reinterpret_cast(arg); + int ii = perthreaddata->i; + Queue* queue = perthreaddata->queue; + Queue* myqueue = &queue[ii]; + FILE* logfile = perthreaddata->logfile; + fprintf(stderr, " PrimaryTask starting, queue %d\n", ii); + + // Loop: + // Remove queue entry (waits if empty) + // If new request + // Set req_rcv_timestamp + // if pending_count at kMaxTransInFlight, + // increment dropped_count + // set status = "too busy" + // go straight to completion, dropping the request + // Increment pending_count + // Enqueue to first work queue + // If completed request + // Decrement pending_count + // Set resp_send_timestamp, resp_rcv_timestamp + // log the request + + do { + while(myqueue->count == 0) { + // Wait for some work + syscall(SYS_futex, &myqueue->count, FUTEX_WAIT, 0, NULL, NULL, 0); + } + // We have a real work item now + // No locks are needed around pending_count because we are the only thread that changes it. + Work* item = Dequeue(myqueue, ii); + kutrace::addevent(KUTRACE_RPCIDREQ, item->log.rpcid); +////fprintf(stderr, "PrimaryTask[%d], pending %d\n", ii, pending_count); +////DumpWork(stderr, item, true); + + uint32 next_q = item->onework[0].queue_num; + if (next_q != 0) { + // There is work to do on initial queue N, but we might be too busy + item->log.req_rcv_timestamp = GetUsec(); + ++pending_count; + if (pending_count <= kMaxTransInFlight) { + // Not too busy. Move the item to another queue + Enqueue(item, &queue[next_q], next_q); + kutrace::addevent(KUTRACE_RPCIDREQ,0); + continue; + } else { + ++dropped_count; + item->log.status = TooBusyStatus; + kutrace::mark_c("drop"); + } + } + + // All done with this item or too busy. Finish up, log, and free + item->log.type = RespRcvType; + item->log.resp_send_timestamp = GetUsec(); + item->log.resp_rcv_timestamp = item->log.resp_send_timestamp + 0.000001; + fwrite(&item->log, 1, sizeof(BinaryLogRecord), logfile); + transaction_times[item->trans_num] = item->log.resp_rcv_timestamp - item->log.req_send_timestamp; + --pending_count; + DeleteWork(item); + kutrace::addevent(KUTRACE_RPCIDREQ,0); + } while (true); +} + + +double fdiv_wait_usec(uint32 usec) { + double divd = 123456789.0; + for (int i = 0; i < (usec * kIterations); ++i) { + divd /= 1.0000001; + divd /= 0.9999999; + } + if (nevertrue) {fprintf(stderr, "%f\n", divd);} // Make live + return divd; // Make live (but only if caller uses it) +} + + +// Worker task loops doing specified work on a given queue +void* WorkerTask(void* arg) { + PerThreadData* perthreaddata = reinterpret_cast(arg); + int ii = perthreaddata->i; + Queue* queue = perthreaddata->queue; + Queue* myqueue = &queue[ii]; + fprintf(stderr, " WorkerTask starting, queue %d\n", ii); + + // Loop: + // Remove queue entry (waits if empty) + // KUtrace the rpcid + // Do "work" for N microseconds + // pop work off the list + // KUtrace back to idle (rpcid 0) + // Enqueue on next queue + + do { + while(myqueue->count == 0) { + // Wait for some work + syscall(SYS_futex, &myqueue->count, FUTEX_WAIT, 0, NULL, NULL, 0); + } + // We have a real work item now; primary inserted the method name + Work* item = Dequeue(myqueue, ii); +////fprintf(stderr, "WorkerTask[%d]\n", ii); +////DumpWork(stderr, item, true); + kutrace::addevent(KUTRACE_RPCIDREQ, item->log.rpcid); + uint32 for_q = item->onework[0].queue_num; + if (for_q != ii) { + fprintf(stderr, "BUG. Work for queue %d but on queue %d\n", for_q, ii); + } + uint32 usec = item->onework[0].usec_busy; + + // Fake "work" for N microseconds + double unused = fdiv_wait_usec(usec); + + // Pop the list + item->onework[0] = item->onework[1]; + item->onework[1] = item->onework[2]; + item->onework[2] = item->onework[3]; + item->onework[3].queue_num = 0; + item->onework[3].usec_busy = 0; + + // On to the next queue; queue[0] will terminate item + uint32 next_q = item->onework[0].queue_num; + Enqueue(item, &queue[next_q], next_q); + kutrace::addevent(KUTRACE_RPCIDREQ, 0); + } while (true); +} + +// Argument queue points to the array of queues +typedef void* (*QueueTask)(void*); +void CreateThreadForQueue(int i, Queue* queue, FILE* logfile, QueueTask qt) { + // Allocate a per-thread data structure and fill it in + PerThreadData* perthreaddata = new PerThreadData; + perthreaddata->i = i; + perthreaddata->queue = queue; + perthreaddata->logfile = logfile; + pthread_t thread; + int iret = pthread_create( &thread, NULL, qt, (void*) perthreaddata); + if (iret != 0) {Error("pthread_create()", iret);} +} + +int main (int argc, const char** argv) { + // Self-tracing if KUtrace module is loaded + kutrace::goipc(argv[0]); + + Queue queue[8]; // queue[0] feeds the primary task; [7] is unused + for (int i = 0; i < 8; ++i) { + InitQueue(&queue[i]); + } + + // To make things live in fdiv_wait_usec + nevertrue = (GetUsec() == 0); // Compiler doen't know this is false + + // Parse command line + uint32 n = 100; // Default + uint32 rate = 1000; // Default + bool skew = false; // Default + for (int i = 1; i < argc; ++i) { + if ((strcmp(argv[i], "-n") == 0) && (i < argc-1)) { + n = atoi(argv[++i]); + } + if ((strcmp(argv[i], "-rate") == 0) && (i < argc-1)) { + rate = atoi(argv[++i]); + } + if (strcmp(argv[i], "-skew") == 0) {skew = true;} + if (strcmp(argv[i], "-s") == 0) {trace_spinlocks = true;} + if (strcmp(argv[i], "-v") == 0) {verbose = true;} + } +fprintf(stderr, "n/rate/skew %u %u %u\n", n, rate, skew); + + // Set up globals + pending_count = 0; + dropped_count = 0; + + delay_times = new int64[n]; + transaction_times = new int64[n]; + +#if 0 +// Calibrate fdiv loop +fprintf(stderr, "%016lld usec before nominal one second loop\n", GetUsec()); +double unused = fdiv_wait_usec(1000000); +fprintf(stderr, "%016lld usec after\n", GetUsec()); +#endif + + // Open log file + const char* fname = MakeLogFileName(argv[0]); + FILE* logfile = OpenLogFileOrDie(fname); + + // Spawn eight queue tasks + // PrimaryTask(&queue[0]) + // for i=1..6 WorkerTask(&queue[i]) + for (int i = 0; i < 7; ++i) { + fprintf(stderr, "queuetest: launching a thread to process queue %d\n", i); + char temp[64]; + snprintf(temp, 64, "queue~%d", i); + kutrace::addname(KUTRACE_QUEUE_NAME, i, temp); + CreateThreadForQueue(i, queue, logfile, (i == 0) ? PrimaryTask : WorkerTask); + } + + // Produce n transactions + // Wait for pending_count to drop to zero + GenerateLoop(n, rate, skew, &queue[0]); + + fprintf(stderr, "\n%d transactions, %d dropped\n", n, dropped_count); + + // Close log file + fclose(logfile); + + // Calculate a few statistics + int64 sum_delay = 0; + int64 sum_trans = 0; + for (int i = 0; i < n; ++i) { + sum_delay += delay_times[i]; + sum_trans += transaction_times[i]; + } + fprintf(stdout, "\n"); + + fprintf(stdout, "Delays (usec), total = %lld, average = %lld\n", sum_delay, sum_delay / n); + if (verbose) { + for (int i = 0; i < n; ++i) { + fprintf(stdout, "%lld ", delay_times[i]); + if ((i % 20) == 19) {fprintf(stdout, "\n");} + } + fprintf(stdout, "\n"); + } + + fprintf(stdout, "Transactions (usec), total = %lld, average = %lld\n", sum_trans, sum_trans / n); + if (verbose) { + for (int i = 0; i < n; ++i) { + fprintf(stdout, "%lld ", transaction_times[i]); + if ((i % 20) == 19) {fprintf(stdout, "\n");} + } + fprintf(stdout, "\n"); + } + + delete[] delay_times; + delete[] transaction_times; + // Get log file name near the end of theprintout + fprintf(stdout, " %s written\n", fname); + + // Self-tracing + char namebuf[256]; + kutrace::stop(kutrace::MakeTraceFileName("qt", namebuf)); + + // Exit, deleting the spawned tasks + return 0; +} + + diff --git a/book-user-code/queuetest_bad_20210206_085042_dclab-2_19105_q.html b/book-user-code/queuetest_bad_20210206_085042_dclab-2_19105_q.html new file mode 100644 index 000000000000..21c36b4d61e7 --- /dev/null +++ b/book-user-code/queuetest_bad_20210206_085042_dclab-2_19105_q.html @@ -0,0 +1,2853 @@ + + + + + + + + + + + + + + + + + + + + +File: + + +color: + + + + + + + +search: + + + +usec: + +.. + + + +Matches: + +
+ + +
+ +
+
+ + + + + + + diff --git a/book-user-code/queuetest_good_20210206_085326_dclab-2_19423_q.html b/book-user-code/queuetest_good_20210206_085326_dclab-2_19423_q.html new file mode 100644 index 000000000000..ea2dc97117d4 --- /dev/null +++ b/book-user-code/queuetest_good_20210206_085326_dclab-2_19423_q.html @@ -0,0 +1,2839 @@ + + + + + + + + + + + + + + + + + + + + +File: + + +color: + + + + + + + +search: + + + +usec: + +.. + + + +Matches: + +
+ + +
+ +
+
+ + + + + + + diff --git a/book-user-code/rawtoevent.cc b/book-user-code/rawtoevent.cc new file mode 100644 index 000000000000..b287407c9838 --- /dev/null +++ b/book-user-code/rawtoevent.cc @@ -0,0 +1,1337 @@ +// Little program to turn raw binary dclab trace files into Ascii event listings +// The main work is turning truncated cycle times into multiples of 10ns +// Copyright 2021 Richard L. Sites +// +// Input has filename like +// kutrace_control_20170821_095154_dclab-1_2056.trace +// +// Compile with g++ -O2 rawtoevent.cc from_base40.cc kutrace_lib.cc -o rawtoevent +// +// od -Ax -tx8z -w32 foo.trace +// + + + +#include +#include +#include + +#include +#include // exit +#include +#include +#include // getpid gethostname +#include // gettimeofday +#include + +#include "basetypes.h" +#include "from_base40.h" +#include "kutrace_control_names.h" +#include "kutrace_lib.h" + + +/* Amount to shift cycle counter to get 20-bit timestamps */ +/* 4 bits = ~ 2.56 GHz/16 ~ 6nsec tick resolution */ +/* 6 bits = ~ 2.56 GHz/64 ~ 24nsec tick resolution */ +/* 8 bits = ~ 2.56 GHz/256 ~ 100nsec tick resolution */ +/* 12 bits = ~ 2.56 GHz/4096 ~ 1.6 usec tick resolution */ +/* THIS MUST MATCH the value in the kernel tracing module/code */ + +// Global for debugging +bool verbose = false; +bool hexevent = false; + + +//VERYTEMP +//static const uint64 FINDME = 1305990942; +static const uint64 FINDME = 0; + +static const bool TRACEWRAP = false; + +static const int kMAX_CPUS = 80; + +static const int mhz_32bit_cycles = 54; + +static const int kNetworkMbPerSec = 1000; // Default: 1 Gb/s + + +// Version 3 all values are pre-shifted + +#define IPC_Flag 0x80 +#define WRAP_Flag 0x40 +#define Unused2_Flag 0x20 +#define Unused1_Flag 0x10 +#define VERSION_MASK 0x0F + +#define RDTSC_SHIFT 0 +#define OLD_RDTSC_SHIFT 6 + + +// Module, control must be at least version 3 +static const int kRawVersionNumber = 3; + +static const char* kIdleName = "-idle-"; + + +// Very first block layout June 2018, called 12/6 headers +// Enables wraparound +// flags = x3 hex +// +-------+-----------------------+-------------------------------+ +// | cpu# | cycle counter | 0 module +// +-------+-----------------------+-------------------------------+ +// | flags | gettimeofday | 1 DoDump +// +-------+-----------------------+-------------------------------+ +// | start cycle counter | 2 DoDump +// +-------------------------------+-------------------------------+ +// | start gettimeofday | 3 DoDump +// +-------------------------------+-------------------------------+ +// | stop cycle counter | 4 DoDump +// +-------------------------------+-------------------------------+ +// | stop gettimeofday | 5 DoDump +// +-------------------------------+-------------------------------+ +// | u n u s e d | 6 +// +-------------------------------+-------------------------------+ +// | u n u s e d | 7 +// +===============================+===============================+ +// | u n u s e d | PID | 8 module +// +-------------------------------+-------------------------------+ +// | u n u s e d | 9 module +// +-------------------------------+-------------------------------+ +// | | 10 module +// + pidname + +// | | 11 module +// +-------------------------------+-------------------------------+ +// | followed by trace entries... | +// ~ ~ +// +// +// All other blocks layout June 2018 +// +-------+-----------------------+-------------------------------+ +// | cpu# | cycle counter | 0 module +// +-------+-----------------------+-------------------------------+ +// | flags | gettimeofday | 1 DoDump +// +===============================+===============================+ +// | u n u s e d | PID | 2 module +// +-------------------------------+-------------------------------+ +// | u n u s e d | 3 module +// +-------------------------------+-------------------------------+ +// | | 4 module +// + pidname + +// | | 5 module +// +-------------------------------+-------------------------------+ +// | followed by trace entries... | +// ~ ~ +// + + +// MWAIT notes: +// $ cat /proc/cpuinfo +// processor : 0 +// vendor_id : GenuineIntel +// cpu family : 6 +// model : 60 ==> 0x3C +// model name : Intel(R) Celeron(R) CPU G1840 @ 2.80GHz +// +// ./drivers/idle/intel_idle.c +// ICPU(0x3c, idle_cpu_hsw), + +// static struct cpuidle_state hsw_cstates[] = { +// These latencies are documented as usec, but I think they are 100ns increments... +// mwait(32), hda_29 13.9us table: 133 +// mwait(32), hda_29 13.3us table: 133 +// mwait(16), hda_29 4.0us table: 33 +// mwait(16), hda_29 3.75us table: 33 +// mwait(1), hda_29 1.74us table: 10 +// mwait(1), hda_29 1.76us table: 10 + +// "C1-HSW", 0x00, .exit_latency = 2, // usec ? +// "C1E-HSW", 0x01, .exit_latency = 10, +// "C3-HSW", 0x10, .exit_latency = 33, +// "C6-HSW", 0x20, .exit_latency = 133, +// "C7s-HSW", 0x32, .exit_latency = 166, +// "C8-HSW", 0x40, .exit_latency = 300, +// "C9-HSW", 0x50, .exit_latency = 600, +// "C10-HSW", 0x60, .exit_latency = 2600, + + + +using std::map; +using std::set; +using std::string; + +static double kDefaultSlope = 0.000285714; // 1/3500, dclab-3 at 3.5 GHz + +// Number of uint64 values per trace block +static const int kTraceBufSize = 8192; +// Number trace blocks per MB +static const double kTraceBlocksPerMB = 16.0; + +static const char* soft_irq_name[] = { + "hi", "timer", "tx", "rx", "block", "irq_p", "taskl", "sched", + "hrtim", "rcu", "", "", "", "", "", "" +}; + +typedef map U64toString; + +// These all use a single static buffer. In real production code, these would +// all be std::string values, or something else at least as safe. +static const int kMaxDateTimeBuffer = 32; +static char gTempDateTimeBuffer[kMaxDateTimeBuffer]; + +static const int kMaxPrintBuffer = 256; +static char gTempPrintBuffer[kMaxPrintBuffer]; + +// F(cycles) gives usec = base_usec + (cycles - base_cycles) * m; +typedef struct { + uint64 base_cycles; + uint64 base_usec; + uint64 base_cycles10; + uint64 base_nsec10; + double m_slope; + double m_slope_nsec10; +} CyclesToUsecParams; + +void SetParams(int64 start_cycles, int64 start_usec, + int64 stop_cycles, int64 stop_usec, CyclesToUsecParams* params) { + params->base_cycles = start_cycles; + params->base_usec = start_usec; + if (stop_cycles <= start_cycles) {stop_cycles = start_cycles + 1;} // avoid zdiv + params->m_slope = (stop_usec - start_usec) * 1.0 / (stop_cycles - start_cycles); + params->m_slope_nsec10 = params->m_slope * 100.0; + if (verbose) { + fprintf(stdout, "SetParams maps %18lldcy ==> %18lldus\n", start_cycles, start_usec); + fprintf(stdout, "SetParams maps %18lldcy ==> %18lldus\n", stop_cycles, stop_usec); + fprintf(stdout, " diff %18lldcy ==> %18lldus\n", stop_cycles - start_cycles, stop_usec - start_usec); + // Assume that cy increments every 64 CPU cycles + fprintf(stdout, "SetParams slope %f us/cy (%f MHz)\n", params->m_slope, 64.0/params->m_slope); + } +} + +void SetParams10(int64 start_cycles10, int64 start_nsec10, CyclesToUsecParams* params) { + params->base_cycles10 = start_cycles10; + params->base_nsec10 = start_nsec10; + if (verbose) { + fprintf(stdout, "SetParams10 maps %16lldcy ==> %lldns10\n", start_cycles10, start_nsec10); + } +} + +int64 CyclesToUsec(int64 cycles, const CyclesToUsecParams& params) { + int64 delta_usec = (cycles - params.base_cycles) * params.m_slope; + return params.base_usec + delta_usec; +} + +uint64 CyclesToNsec10(uint64 cycles, CyclesToUsecParams& params) { + int64 delta_nsec10 = (cycles - params.base_cycles10) * params.m_slope_nsec10; + return params.base_nsec10 + delta_nsec10; +} + +int64 UsecToCycles(int64 usec, CyclesToUsecParams& params) { + int64 delta_cycles = (usec - params.base_usec); + delta_cycles /= params.m_slope; // Combining above fails to convert double=>int64 + return params.base_cycles + delta_cycles; +} + + +// Turn seconds since the epoch into date_hh:mm:ss +// Not valid after January 19, 2038 +const char* FormatSecondsDateTime(int32 sec) { + if (sec == 0) {return "unknown";} // Longer spelling: caller expecting date + time_t tt = sec; + struct tm* t = localtime(&tt); + sprintf(gTempDateTimeBuffer, "%04d-%02d-%02d_%02d:%02d:%02d", + t->tm_year + 1900, t->tm_mon + 1, t->tm_mday, + t->tm_hour, t->tm_min, t->tm_sec); + return gTempDateTimeBuffer; +} + +// Turn usec since the epoch into date_hh:mm:ss.usec +const char* FormatUsecDateTime(int64 us) { + if (us == 0) {return "unknown";} // Longer spelling: caller expecting date + int32 seconds = us / 1000000; + int32 usec = us - (seconds * 1000000); + snprintf(gTempPrintBuffer, kMaxPrintBuffer, "%s.%06d", + FormatSecondsDateTime(seconds), usec); + return gTempPrintBuffer; +} + +// We wrapped if prior > now, except that we allow a modest amount of going backwards +// because an interrupt entry can get recorded in the midst of recording say a +// syscallentry, in which case the stored irq entry's timestamp may be later than +// the subsequently-written syscall entry's timestamp. We allow 4K counts backward +// (about 80 usec at nominal 20 ns/count). Count incfrement should be kept between +// 10 nsec and 40 nsec. + + +inline bool Wrapped(uint64 prior, uint64 now) { + if (prior <= now) {return false;} // Common case + return (prior > (now + 4096)); // Wrapped if prior is larger +} + +// A user-mode-execution event is the pid number plus 64K +uint64 PidToEvent(uint64 pid) {return (pid & 0xFFFF) | 0x10000;} +uint64 EventToPid(uint64 event) {return event & 0xFFFF;} + +// Event tests +inline bool is_cpu_description(uint64 event) { + if (event == KUTRACE_MBIT_SEC) {return true;} + return false; +} + +// Return true if the event is user-mode execution +inline bool is_contextswitch(uint64 event) {return (event == KUTRACE_USERPID);} + +// Return true if the event is the idle task, pid 0 +inline bool is_idle(uint64 event) {return (event == 0x10000);} + +// Return true if the event is user-mode execution +inline bool is_usermode(uint64 event) {return (event > 0xffff) && !is_idle(event);} + +// Return true if the event is a syscall/interrupt/trap +inline bool is_call(uint64 event) {return (event <= 0xffff) && (KUTRACE_TRAP <= event) && ((event & 0x0200) == 0);} + +// Return true if the event is an optimized syscall/interrupt/trap with included return +inline bool is_opt_call(uint64 event, uint64 delta_t) {return (delta_t > 0) && is_call(event);} + +// Return true if the event is a syscall/interrupt/trap return +inline bool is_return(uint64 event) {return (event <= 0xffff) && (KUTRACE_TRAP <= event) && ((event & 0x0200) != 0);} + +// Return true if the event is a time pair +inline bool is_timepair(uint64 event) {return (event & ~0x0f0) == KUTRACE_TIMEPAIR;} + +// Return true if the event is a name definition +inline bool is_namedef(uint64 event) {return (0x010 <= event) && (event <= 0x1ff) && (event != KUTRACE_PC_TEMP);} + +// Return true if the name event is a PID name definition +inline bool is_pidnamedef(uint64 event) {return (event & 0xf0f) == 0x002;} + +// Return true if the name event is a method name definition +inline bool is_methodnamedef(uint64 event) {return (event & 0xf0f) == 0x003;} + +// Return true if the name event is a lock name definition +inline bool is_locknamedef(uint64 event) {return (event & 0xf0f) == 0x007;} + +// Return true if the name event is the kernel version +inline bool is_kernelnamedef(uint64 event) {return (event & 0xf0f) == KUTRACE_KERNEL_VER;} + +// Return true if the name event is the CPU model name +inline bool is_modelnamedef(uint64 event) {return (event & 0xf0f) == KUTRACE_MODEL_NAME;} + +// Return true if the name event is the CPU model name +inline bool is_hostnamedef(uint64 event) {return (event & 0xf0f) == KUTRACE_HOST_NAME;} + +// Return true if the name event is the CPU model name +inline bool is_queuenamedef(uint64 event) {return (event & 0xf0f) == KUTRACE_QUEUE_NAME;} + +// Return true if the name event is the CPU model name +inline bool is_resnamedef(uint64 event) {return (event & 0xf0f) == KUTRACE_RES_NAME;} + + +// Return true if the event is a special marker (but not UserPidNum) +inline bool is_special(uint64 event) {return (0x0200 < event) && (event <= KUTRACE_MAX_SPECIAL);} + +// Return true if the event is mark_a .. mark_d +inline bool is_mark(uint64 event) {return ((0x020A <= event) && (event <= 0x020D));} + +// Return true if the event is mark_a mark_b mark_c +inline bool is_mark_abc(uint64 event) { + return (event == 0x020A) || (event == 0x020B) || (event == 0x020C); +} + +// Return true if the event is PC or PC_TEMP +inline bool is_pc_sample(uint64 event) { + return (event == KUTRACE_PC_U) || (event == KUTRACE_PC_K) || (event == KUTRACE_PC_TEMP); +} + +// Return true if the event is a local timer, for PC start_ts fixup +inline bool is_timer_irq(uint64 event) { + return (event == kTIMER_IRQ_EVENT); +} + +// Return true if the event is rpcreq, rpcresp, rpcmid, rpcrxpkt, rpxtxpkt, +inline bool has_rpcid(uint64 event) { + return (KUTRACE_RPCIDREQ <= event) && (event <= KUTRACE_RPCIDTXMSG); +} + +// Return true if the event is raw kernel packet receive/send time and hash +inline bool is_raw_pkt_hash(uint64 event) { + return (KUTRACE_RX_PKT <= event) && (event <= KUTRACE_TX_PKT); +} + +// Return true if the event is user message receive/send time and hash +inline bool is_user_msg_hash(uint64 event) { + return (KUTRACE_RX_USER <= event) && (event <= KUTRACE_TX_USER); +} + +// Return true if the event is RPC message processing begin/end +inline bool is_rpc_msg(uint64 event) { + return (KUTRACE_RPCIDREQ <= event) && (event <= KUTRACE_RPCIDRESP); +} + +// Return true if the event is lock special +inline bool is_lock(uint64 event) { + return (KUTRACE_LOCKNOACQUIRE <= event) && (event <= KUTRACE_LOCKWAKEUP); +} + +// Return true if this event is irq call/ret to bottom half soft_irq handler (BH) +inline bool is_bottom_half(uint64 event) {return (event & ~0x0200) == 0x5FF;} + + + +int TracefileVersion(uint8 flags) { + return flags & VERSION_MASK; +} + +int HasIPC(uint8 flags) { + return (flags & IPC_Flag) != 0; +} + +int HasWraparound(uint8 flags) { + return (flags & WRAP_Flag) != 0; +} + + +# if 0 +// Change any spaces and non-Ascii to underscore +// time dur event pid name(event) +void OutputName(FILE* f, uint64 nsec10, uint64 nameinsert, uint32 argall, const char* name) { + // Avoid crazy big times + if (nsec10 >= 99900000000LL) { + if (verbose) {fprintf(stdout, "BUG ts=%lld\n", nsec10);} + return; + } + + // One initial word plus 8 chars per word + uint64 len = ((strlen(name) + 7) >> 3) + 1; + uint64 duration = 1; + uint64 event = KUTRACE_PIDNAME; + // Look for lock name or kernel version or model name + if ((nameinsert & 0xF0000) == 0x20000) { + event = KUTRACE_LOCKNAME; + nameinsert &= 0xFFFF; + } + if ((nameinsert & 0xF0000) == 0x30000) { + event = KUTRACE_METHODNAME; + nameinsert &= 0xFFFF; + } + if ((nameinsert & 0xF0000) == 0x40000) { + event = KUTRACE_KERNEL_VER; + nameinsert &= 0xFFFF; + } + if ((nameinsert & 0xF0000) == 0x50000) { + event = KUTRACE_MODEL_NAME; + nameinsert &= 0xFFFF; + } + if ((nameinsert & 0xF0000) == 0x60000) { + event = KUTRACE_HOST_NAME; + nameinsert &= 0xFFFF; + } + if ((nameinsert & 0xF0000) == 0x70000) { + event = KUTRACE_QUEUE_NAME; + nameinsert &= 0xFFFF; + } + if ((nameinsert & 0xF0000) == 0x80000) { + event = KUTRACE_RES_NAME; + nameinsert &= 0xFFFF; + } + event |= (len << 4); + + fprintf(f, "%lld %lld %lld %d %s\n", + nsec10, duration, event, argall, name); + // Also put the name at the very front of the sorted event list + fprintf(f, "%lld %lld %lld %d %s\n", + -1ll, duration, event, argall, name); +} +#endif + +// Change any spaces and non-Ascii to underscore +// time dur event pid name(event) +void OutputName(FILE* f, uint64 nsec10, uint64 event, uint32 argall, const char* name) { + // Avoid crazy big times + if (nsec10 >= 99900000000LL) { + if (verbose) {fprintf(stdout, "BUG ts=%lld\n", nsec10);} + return; + } + + uint64 dur = 1; + // One initial word plus 8 chars per word + uint64 len = ((strlen(name) + 7) >> 3) + 1; + event = (event & 0xF0F) | (len << 4); // Set name length + + fprintf(f, "%lld %lld %lld %d %s\n", nsec10, dur, event, argall, name); + // Also put the name at the very front of the sorted event list + fprintf(f, "%lld %lld %lld %d %s\n", -1ll, dur, event, argall, name); +} + +// time dur event cpu pid rpc arg retval IPC name(event) +void OutputEvent(FILE* f, + uint64 nsec10, uint64 duration, uint64 event, uint64 current_cpu, + uint64 pid, uint64 rpc, + uint64 arg, uint64 retval, int ipc, const char* name) { + // Avoid crazy big times + bool fail = false; + if (nsec10 >= 99900000000LL) {fail = true;} + if (duration >= 99900000000LL) {fail = true;} + if (nsec10 + duration >= 99900000000LL) {fail = true;} + if (fail) { + if (verbose) {fprintf(stdout, "BUG %lld %lld\n", nsec10, duration);} + return; + } + + fprintf(f, "%lld %lld %lld %lld %lld %lld %lld %lld %d %s (%llx)\n", + nsec10, duration, event, current_cpu, + pid, rpc, + arg, retval, ipc, name, event); +} + +// Add the pid#/rpc#/etc. to the end of name, if not already there +string AppendNum(const string& name, uint64 num) { + char num_temp[24]; + sprintf(num_temp, ".%lld", num & 0xffff); + if (strstr(name.c_str(), num_temp) == NULL) { + return name + string(num_temp); + } + return name; +} + +// Add the pkt hash, etc. in hex to the end of name, if not already there +string AppendHexNum(const string& name, uint64 num) { + char num_temp[24]; + sprintf(num_temp, ".%04llX", num & 0xffff); + if (strstr(name.c_str(), num_temp) == NULL) { + return name + string(num_temp); + } + return name; +} + +// Change spaces and control codes to underscore +// Get rid of any high bits in names +string MakeSafeAscii(string s) { + for (int i = 0; i < s.length(); ++i) { + if (s[i] <= 0x20) {s[i] = '_';} + if (s[i] == '"') {s[i] = '_';} + if (s[i] == '\\') {s[i] = '_';} + s[i] &= 0x7f; + } + return s; +} + +bool Digit(char c) {return ('0' <= c) & (c <= '9');} + +string ReduceSpaces(string s) { + int k = 1; + int len = s.length(); + if (len < 3) {return s;} + // The very first character is unchanged + for (int i = 1; i < len - 1; ++i) { + if (s[i] != ' ') { + s[k++] = s[i]; + } else { + // Keep space (as underscore) only if between two digits + if (Digit(s[i - 1]) && Digit(s[i + 1])) { + s[k++] = '_'; + } + // Else drop the space + } + } + s[k++] = s[len - 1]; // The very last character + return s.substr(0, k); +} + +// +// Usage: rawtoevent +// +int main (int argc, const char** argv) { + // Some statistics + uint64 base_usec_timestamp; + uint64 event_count = 0; + uint64 lo_timestamp = 0x7FFFFFFFFFFFFFFFl; + uint64 hi_timestamp = 0; + set unique_cpus; + set unique_pids; + uint64 ctx_switches = 0; + uint64 total_marks = 0; + uint64 events_by_type[16]; // From high nibble of eventnum + memset(events_by_type, 0, 16 * sizeof(uint64)); + + uint64 current_cpu = 0; + uint64 traceblock[kTraceBufSize]; // 8 bytes per trace entry + uint8 ipcblock[kTraceBufSize]; // One byte per trace entry + + uint64 current_pid[kMAX_CPUS]; // Keep track of current PID on each of 16+ cores + uint64 current_rpc[kMAX_CPUS]; // Keep track of current rpcid on each of 1+6 cores + uint64 prior_timer_irq_nsec10[kMAX_CPUS]; // For moving PC sample start_ts back + bool at_first_cpu_block[kMAX_CPUS]; // To special-case the initial PID of each CPU in trace + U64toString names; // Name keyed by PID#, RPC# etc. with high type nibble + + // Start timepair is set by DoInit + // Stop timepair is set by DoOff + // If start_cycles is zero, we got here directly without calling DoInit, + // which was done in some earlier run of this program. In that case, go + // find the start pair as the first real trace entry in the first trace block. + CyclesToUsecParams params; + + // Events are 0..64K-1 for everything except context switch. + // Context switch events are 0x10000 + pid + // Initialize idle process name, pid 0 + names[0x10000] = string(kIdleName); + + for (int i = 0; i < kMAX_CPUS; ++i) { + current_pid[i] = 0; + current_rpc[i] = 0; + prior_timer_irq_nsec10[i] = 0; + at_first_cpu_block[i] = true; + } + + // For converting cycle counts to multiples of 100ns + double m = kDefaultSlope; + + FILE* f = stdin; + if (argc >= 2) { + f = fopen(argv[1], "rb"); + if (f == NULL) { + fprintf(stderr, "%s did not open\n", argv[1]); + exit(0); + } + } + + // Pick up flags + for (int i = 1; i < argc; ++i) { + if (strcmp(argv[i], "-v") == 0) {verbose = true;} + if (strcmp(argv[i], "-h") == 0) {hexevent = true;} + } + + int blocknumber = 0; + uint64 base_minute_usec, base_minute_cycle, base_minute_shift; + bool unshifted_word_0 = false; + + // Need this to sort in front of allthe timestamps + fprintf(stdout, "# ## VERSION: %d\n", kRawVersionNumber); + uint8 all_flags = 0; // They should all be the same + uint8 first_flags; // Just first block has tracefile version number + + + //--------------------------------------------------------------------------// + // Outer loop over blocks // + //--------------------------------------------------------------------------// + while (fread(traceblock, 1, sizeof(traceblock), f) != 0) { + // Need first [1] line to get basetime in later steps + // TODO: Move this to a stylized BASETIME comment + fprintf(stdout, "# blocknumber %d\n", blocknumber); + fprintf(stdout, "# [0] %016llx\n", traceblock[0]); + fprintf(stdout, "# [1] %s %02llx\n", + FormatUsecDateTime(traceblock[1] & 0x00fffffffffffffful), + traceblock[1] >> 56); + fprintf(stdout, + "# TS DUR EVENT CPU PID RPC ARG0 RETVAL IPC NAME (t and dur multiples of 10ns)\n"); + + if (verbose || hexevent) { + fprintf(stdout, "%% %02llx %014llx\n", traceblock[0] >> 56, traceblock[0] & 0x00fffffffffffffful); + fprintf(stdout, "%% %02llx %014llx\n", traceblock[1] >> 56, traceblock[1] & 0x00fffffffffffffful); + } +// +-------+-----------------------+-------------------------------+ +// | cpu# | cycle counter | 0 module +// +-------+-----------------------+-------------------------------+ +// | flags | gettimeofday | 1 DoDump +// +-------+-----------------------+-------------------------------+ + + // Pick out CPU number for this traceblock + current_cpu = traceblock[0] >> 56; + uint64 base_cycle = traceblock[0] & 0x00fffffffffffffful; + + // traceblock[1] has flags in top byte. + uint8 flags = traceblock[1] >> 56; + uint64 gtod = traceblock[1] & 0x00fffffffffffffful; + + // Sanity check. If fail, ignore this block + static const uint64 usec_per_100_years = 1000000LL * 86400 * 365 * 100; // Thru ~2070 + + bool fail = false; + if (kMAX_CPUS <= current_cpu) { + fprintf(stderr, "FAIL: block[%d] CPU number %lld > max %d\n", blocknumber, current_cpu, kMAX_CPUS); + fail = true; + } + // No constraints on base_cycle + // No constraints on flags + if (usec_per_100_years <= gtod) { + fprintf(stderr, "FAIL: block[%d] gettimeofday crazy large %016llx\n", blocknumber, gtod); + fail = true; + } + + + all_flags |= flags; + bool this_block_has_ipc = (HasIPC(flags)); + + // For each 64KB traceblock that has IPC_Flag set, also read the IPC bytes + if (this_block_has_ipc) { + // Extract 8KB IPC block + int n = fread(ipcblock, 1, sizeof(ipcblock), f); + } else { + memset(ipcblock, 0, sizeof(ipcblock)); // Default if no IPC data + } + +// WRAPAROUND PROBLEM: +// We pick base_minute_usec here in block 0, but it can be +// long before the real wrapped trace entries in blocks 1..N +// Our downstream display does badly with seconds much over 120... +// +// We would like the base_minute_usec to be set by the first real entry in block 1 instead... +// Can still use paramaters here for basic time conversion. +// Not much issue with overflow, I think. +// + + // If very first block, pick out time conversion parameters + int first_real_entry = 2; + bool very_first_block = (blocknumber == 0); + if (very_first_block) { + first_real_entry = 8; + + int64 start_cycles = traceblock[2]; + int64 start_usec = traceblock[3]; + int64 stop_cycles = traceblock[4]; + int64 stop_usec = traceblock[5]; + base_usec_timestamp = start_usec; + + // For Arm-32, the "cycle" counter is only 32 bits at 54 MHz, so wraps about every 75 seconds. + // This can leave stop_cycles small by a few multiples of 4G. We do a termpoary fix here + // for exactly 54 MHz. Later, we could find or take as input a different approximate + // counter frequency. + bool has_32bit_cycles = ((start_cycles | stop_cycles) & 0xffffffff00000000llu) == 0; + if (has_32bit_cycles) { +fprintf(stderr, "has_32bit_cycles\n"); + uint64 elapsed_usec = (uint64)(stop_usec - start_usec); + uint64 elapsed_cycles = (uint64)(stop_cycles - start_cycles); + uint64 expected_cycles = elapsed_usec * mhz_32bit_cycles; +fprintf(stderr, " elapsed usec %lld\n", elapsed_usec); +fprintf(stderr, " elapsed cycles %lld\n", elapsed_cycles); +fprintf(stderr, " expected cycles %lld\n", expected_cycles); + // Pick off the high bits + uint64 approx_hi = expected_cycles & 0xffffffff00000000llu; + // Put them in + stop_cycles |= (int64)approx_hi; + // Cross-check and change by 1 if right at a boundary + // and off by more than 12.5% from expected MHz + elapsed_cycles = (uint64)(stop_cycles - start_cycles); +fprintf(stderr, " elapsed cycles %lld\n", elapsed_cycles); + uint64 ratio = elapsed_cycles / elapsed_usec; +fprintf(stderr, " ratio %lld\n", ratio); + if (ratio > (mhz_32bit_cycles + (mhz_32bit_cycles >> 3))) {stop_cycles -= 0x0000000100000000llu;} + if (ratio < (mhz_32bit_cycles - (mhz_32bit_cycles >> 3))) {stop_cycles += 0x0000000100000000llu;} + elapsed_cycles = (uint64)(stop_cycles - start_cycles); +fprintf(stderr, " elapsed cycles %lld\n", elapsed_cycles); + } + + if (verbose || hexevent) { + fprintf(stdout, "%% %016llx = %lldcy %lldus (%lld mod 1min)\n", + traceblock[2], start_cycles, start_usec, start_usec % 60000000l); + fprintf(stdout, "%% %016llx\n", traceblock[3]); + fprintf(stdout, "%% %016llx = %lldcy %lldus (%lld mod 1min)\n", + traceblock[4], stop_cycles, stop_usec, stop_usec % 60000000l); + fprintf(stdout, "%% %016llx\n", traceblock[5]); + fprintf(stdout, "%% %016llx unused\n", traceblock[6]); + fprintf(stdout, "%% %016llx unused\n", traceblock[7]); + fprintf(stdout, "\n"); + } + +// +-------+-----------------------+-------------------------------+ +// | cpu# | cycle counter | 0 module +// +-------+-----------------------+-------------------------------+ +// | flags | gettimeofday | 1 DoDump +// +-------------------------------+-------------------------------+ +// | start cycle counter | 2 DoDump +// +-------------------------------+-------------------------------+ +// | start gettimeofday | 3 DoDump +// +-------------------------------+-------------------------------+ +// | stop cycle counter | 4 DoDump +// +-------------------------------+-------------------------------+ +// | stop gettimeofday | 5 DoDump +// +-------------------------------+-------------------------------+ +// | u n u s e d | 6 +// +-------------------------------+-------------------------------+ +// | u n u s e d | 7 +// +-------------------------------+-------------------------------+ + + // More sanity checks. If fail, ignore this block + if (start_cycles > stop_cycles) { + fprintf(stderr, "FAIL: block[%d] start_cy > stop_cy %lld %lld\n", blocknumber, start_cycles, stop_cycles); +//VERYTEMP Arm32 wraparound 32-bit counter +// TODO: if cycle counter values afre all 32-bit, increase stop_cycles until +// apparent frequency vs. timeofday is between 25 and 100 MHz (10-40 nsec) + // fail = true; + } + if (start_usec > stop_usec) { + fprintf(stderr, "FAIL: block[%d] start_usec > stop_usec %lld %lld\n", blocknumber, start_usec, stop_usec); + fail = true; + } + if (usec_per_100_years <= start_cycles) { + fprintf(stderr, "FAIL: block[%d] start_cycles crazy large %016llx \n", blocknumber, start_cycles); + fail = true; + } + if (usec_per_100_years <= stop_cycles) { + fprintf(stderr, "FAIL: block[%d] stop_cycles crazy large %016llx \n", blocknumber, stop_cycles); + fail = true; + } + + if (fail) { + fprintf(stderr, "**** FAIL in block[0] is fatal ****\n"); + fprintf(stderr, " %016llx %016llx\n",traceblock[0], traceblock[1]); + exit(0); + } + + uint64 block_0_cycle = traceblock[0] & 0x00fffffffffffffful; + if ((block_0_cycle / start_cycles) > 1) { + // Looks like bastard file: word 0 is unshifted by mistake + unshifted_word_0 = true; + first_real_entry = 6; + } + + // Map start_cycles <==> start_usec + SetParams(start_cycles, start_usec, stop_cycles, stop_usec, ¶ms); + + // Round usec down to multiple of 1 minute + base_minute_usec = (start_usec / 60000000) * 60000000; + // Backmap base_minute_usec to cycles + base_minute_cycle = UsecToCycles(base_minute_usec, params); + + // Now instead map base_minute_cycle <==> 0 + SetParams10(base_minute_cycle, 0, ¶ms); + + first_flags = flags; +//fprintf(stderr, "first_flags %02x\n", first_flags); + } // End of block[0] preprocessing + + if (fail) { + fprintf(stderr, "**** FAIL -- skipping block[%d] ****\n", blocknumber); + fprintf(stderr, " %016llx %016llx\n",traceblock[0], traceblock[1]); + for (int i = 0; i < 16; ++i) {fprintf(stderr, " [%d] %016llu\n", i, traceblock[i]);} + ++blocknumber; + continue; + } + + // Pick out CPU number for this traceblock + current_cpu = traceblock[0] >> 56; + unique_cpus.insert(current_cpu); // stats + + // Pick out times for converting to 100Mhz + if (unshifted_word_0) {base_cycle >>= OLD_RDTSC_SHIFT;} + uint64 prepend = base_cycle & ~0xfffff; + + // The base cycle count for this block may well be a bit later than the truncated time + // in the first real entry, and may have wrapped in its low 20 bits. If so, the high bits + // we want to prepend should be one smaller. + uint64 first_timestamp = traceblock[first_real_entry] >> 44; + uint64 prior_t = first_timestamp; + + // If wraparound trace and in very_first_block, suppress everything except name entries + // and hardware description + bool keep_just_names = HasWraparound(first_flags) && very_first_block; + + if ((TracefileVersion(first_flags) >= 3) && !unshifted_word_0) { + /* Every block has PID and pidname at the front */ + /* CPU frequency may be in the first block per CPU, in the high half of pid */ + uint64 pid = traceblock[first_real_entry + 0] & 0x00000000ffffffffLLU; + uint64 freq_mhz = traceblock[first_real_entry + 0] >> 32; + uint64 unused = traceblock[first_real_entry + 1]; + char pidname[24]; + memcpy(pidname, reinterpret_cast(&traceblock[first_real_entry + 2]), 16); + pidname[16] = '\0'; +if (at_first_cpu_block[current_cpu]) { +fprintf(stderr, "cpu %lld pid %lld freq %lld %s\n", current_cpu, pid, freq_mhz, pidname); +} + + if (verbose || hexevent) { + fprintf(stdout, "%% %016llx pid %lld\n", traceblock[first_real_entry + 0], pid); + fprintf(stdout, "%% %016llx unused\n", traceblock[first_real_entry + 1]); + fprintf(stdout, "%% %016llx name %s\n", traceblock[first_real_entry + 2], pidname); + fprintf(stdout, "%% %016llx name\n", traceblock[first_real_entry + 3]); + fprintf(stdout, "\n"); + } +// Every block has PID and pidname at the front +// +-------+-----------------------+-------------------------------+ +// | cpu# | cycle counter | 0 module +// +-------+-----------------------+-------------------------------+ +// | flags | gettimeofday | 1 DoDump +// +-------------------------------+-------------------------------+ +// | u n u s e d | PID | 2 or 8 module +// +-------------------------------+-------------------------------+ +// | u n u s e d | 3 or 9 module +// +-------------------------------+-------------------------------+ +// | | 4 or 10 module +// + pidname + +// | | 5 or 11 module +// +-------------------------------+-------------------------------+ + + // Remember the name for this pid, except don't change pid 0 + uint64 nameinsert = PidToEvent(pid); + if (pid == 0) {strcpy(pidname, kIdleName);} + string name = MakeSafeAscii(string(pidname)); + names[nameinsert] = name; + + // To allow updates of the reconstruction stack in eventtospan + uint64 nsec10 = CyclesToNsec10(base_cycle, params); + OutputName(stdout, nsec10, KUTRACE_PIDNAME, pid, name.c_str()); + + // New user-mode process id, pid + unique_pids.insert(pid); // stats + if (current_pid[current_cpu] != pid) {++ctx_switches;} // stats + current_pid[current_cpu] = pid; + + uint64 event = KUTRACE_USERPID; // Context switch + uint64 duration = 1; + if (!keep_just_names) { + name = AppendNum(name, pid); + + // NOTE: OutputEvent here is likely a bug. Forcing a context switch at block boundary + // unfortunately has a later timestamp than the very first entry of the block + // because that entry's time was captured first, then reserve space which + // switches blocks and grabs a new time for the block PID, ~300ns later than + // the entry that is then going to be first-in-block. Hmmm. + // The effect is that first-entry = ctx switch gets LOST. + // Commenting out for the time being. dsites 2020.11.12. Fixes reconstruct bug. + // + // A possible alternate design is to back up the timestamp here to just before the + // first real entry. + // + /////OutputEvent(stdout, nsec10, duration, event, current_cpu, + //// pid, 0, 0, 0, 0, name.c_str()); + + // Statistics: don't count as a context switch -- almost surely same + + // dsites 2021.07.26 + // Output the very first block's context switch to the running process at trace startup + // dsites 2021.10.20 Output initial CPU frequency if nonzero + if (at_first_cpu_block[current_cpu]) { + at_first_cpu_block[current_cpu] = false; + OutputEvent(stdout, nsec10, duration, KUTRACE_USERPID, current_cpu, + pid, 0, 0, 0, 0, name.c_str()); + if (0 < freq_mhz) { + OutputEvent(stdout, nsec10, duration, KUTRACE_PSTATE, current_cpu, + pid, 0, freq_mhz, 0, 0, "freq"); + } + } + } + + first_real_entry += 4; + } // End of each block preprocessing + + + // We wrapped if high bit of first_timestamp is 1 and high bit of base is 0 + if (Wrapped(first_timestamp, base_cycle)) { + prepend -= 0x100000; + if (TRACEWRAP) {fprintf(stdout, " Wrap0 %05llx %05llx\n", first_timestamp, base_cycle);} + } + + //------------------------------------------------------------------------// + // Inner loop over eight-byte entries // + //------------------------------------------------------------------------// + for (int i = first_real_entry; i < kTraceBufSize; ++i) { + int entry_i = i; // Always the first word, even if i subsequently incremented + bool has_arg = false; // Set true if low 32 bits are used + bool extra_word = false; // Set true if entry is at least two words + bool deferred_rpcid0 = false; + uint8 ipc = ipcblock[i]; + + // Completely skip any all-zero NOP entries + if (traceblock[i] == 0LLU) {continue;} + + // Skip the entire rest of the block if all-ones entry found + if (traceblock[i] == 0xffffffffffffffffLLU) {break;} + + // +-------------------+-----------+---------------+-------+-------+ + // | timestamp | event | delta | retval| arg0 | + // +-------------------+-----------+---------------+-------+-------+ + // 20 12 8 8 16 + + uint64 t = traceblock[i] >> 44; // Timestamp + uint64 n = (traceblock[i] >> 32) & 0xfff; // event number + uint64 arg = traceblock[i] & 0x0000ffff; // syscall/ret arg/retval + uint64 argall = traceblock[i] & 0xffffffff; // mark_a/b/c/d, etc. + uint64 arg_hi = (traceblock[i] >> 16) & 0xffff; // rx_pkt tx_pkt lglen8 + uint64 delta_t = (traceblock[i] >> 24) & 0xff; // Opt syscall return timestamp + uint64 retval = (traceblock[i] >> 16) & 0xff; // Opt syscall retval + + // Completely skip any mostly-FFFF entries, but keep return of 32-bit -sched- + if ((t == 0xFFFFF) && (n == 0xFFF)) {continue;} + + // Sign extend optimized retval [-128..127] from 8 bits to 16 + retval = (uint64)(((int64)(retval << 56)) >> 56) & 0xffff; + if (verbose) { + fprintf(stdout, "%% [%d,%d] %05llx %03llx %04llx %04llx = %lld %lld %lld, %lld %lld %02x\n", + blocknumber, i, + (traceblock[i] >> 44) & 0xFFFFF, + (traceblock[i] >> 32) & 0xFFF, + (traceblock[i] >> 16) & 0xFFFF, + (traceblock[i] >> 0) & 0xFFFF, + t, n, delta_t, retval, arg, ipc); + } + + if (is_mark(n)) { + ++total_marks; // stats + } else { + ++events_by_type[n >> 8]; // stats + } + + uint64 event; + if (n == KUTRACE_USERPID) { // Context switch + has_arg = true; + // Change event to new process id + 64k + event = PidToEvent(arg); + } else { + // Anything else 0..64K-1 + event = n; + } + + // 2019.03.18 Go back to preserving KUTRACE_USERPID for eventtospan + event = n; + + // Convert truncated start time to full-width start time + // Increment the prepend if truncated time rolls over + if (Wrapped(prior_t, t)) {prepend += 0x100000;} + prior_t = t; + + // tfull is increments of cycles from the base minute for this trace, + // also expressed as increments of cycles + uint64 tfull = prepend | t; + + // nsec10 is increments of 10ns from the base minute. + // For a trace starting at 50 seconds into a minute and spanning 99 seconds, + // this reaches 14,900,000,000 which means the + // base minute + 149.000 000 00 seconds. More than 32 bits. + uint64 nsec10 = CyclesToNsec10(tfull, params); + uint64 duration = 0; + + if (has_rpcid(n)) { + // Working on this RPC until one with arg=0 + has_arg = true; + // Defer switching to zero until after the OutputEvent + if (arg != 0) {current_rpc[current_cpu] = arg;} + else {deferred_rpcid0 = true;} + } + + // Pick out any name definitions + if (is_namedef(n)) { + has_arg = true; + // We have a name or other variable-length entry + // Remap the raw numbering to unique ranges in names[] + uint64 nameinsert; + uint64 rpcid; + uint8 lglen8; + if (is_pidnamedef(n)) { + nameinsert = PidToEvent(arg); // Processes 0..64K + } else if (is_locknamedef(n)) { + nameinsert = arg | 0x20000; // Lock names + } else if (is_methodnamedef(n)) { + rpcid = arg & 0xffff; // RPC method names + lglen8 = arg_hi; // may include TenLg msg len + nameinsert = rpcid | 0x30000; + } else if (is_kernelnamedef(n)) { + nameinsert = arg | 0x40000; // Kernel version + } else if (is_modelnamedef(n)) { + nameinsert = arg | 0x50000; // CPU model + } else if (is_hostnamedef(n)) { + nameinsert = arg | 0x60000; // CPU host name + } else if (is_queuenamedef(n)) { + nameinsert = arg | 0x70000; // Queue name + } else if (is_resnamedef(n)) { + nameinsert = arg | 0x80000; // Resource name + } else { + nameinsert = ((n & 0x00f) << 8) | arg; // Syscall, etc. Include type of name + } + + char tempstring[64]; + int len = (n >> 4) & 0x00f; + if ((len < 1) || (8 < len)) {continue;} + // Ignore any timepair but keep the names + if (!is_timepair(n)) { + memset(tempstring, 0, 64); + memcpy(tempstring, &traceblock[i + 1], (len - 1) * 8); + // Remember the name, except don't change pid 0 + // And throw away the empty name + if (nameinsert == 0x10000) {strcpy(tempstring, kIdleName);} + string name = string(tempstring); + if (is_kernelnamedef(n) || is_modelnamedef(n)) { + name = ReduceSpaces(name); + } + name = MakeSafeAscii(name); + if (!name.empty()) { + names[nameinsert] = name; + ////OutputName(stdout, nsec10, nameinsert, argall, name.c_str()); + OutputName(stdout, nsec10, n, argall, name.c_str()); + } + } + i += (len - 1); // Skip over the rest of the name event + extra_word = true; + continue; + } + + if (is_cpu_description(n)) { // Just pass it on to eventtospan + OutputEvent(stdout, nsec10, 1, event, current_cpu, + 0, 0, argall, 0, 0, ""); + } + + if (keep_just_names) {continue;} + + //======================================================================== + // Name definitions above skip this code, so do not affect lo/hi + if (lo_timestamp > nsec10) {lo_timestamp = nsec10;} // stats + if (hi_timestamp < nsec10) {hi_timestamp = nsec10;} // stats + + // Look for new user-mode process id, pid + if (is_contextswitch(n)) { + has_arg = true; + unique_pids.insert(arg); // stats + if (current_pid[current_cpu] != arg) {++ctx_switches;} // stats + current_pid[current_cpu] = arg; + } + + // Nothing else, so dump in decimal + // Here n is the original 12-bit event; event is (pid | 64K) if n is user-mode code + string name = string(""); + + // Put in name of event + if (is_return(n)) { + uint64 call_event = event & ~0x0200; + if (names.find(call_event) != names.end()) {name.append("/" + names[call_event]);} + } else { + if (names.find(event) != names.end()) {name.append(names[event]);} + } + + if (is_contextswitch(n)) { + has_arg = true; + uint64 target = PidToEvent(arg); + if (names.find(target) != names.end()) {name.append(names[target]);} + name = AppendNum(name, arg); + } + + if (is_usermode(event)) { + if (names.find(event) != names.end()) {name.append(names[event]);} + name = AppendNum(name, EventToPid(event)); + } + + // If this is an optimized call, pick out the duration and leave return value + // The ipc value for this is two 4-bit values: + // low bits IPC before call, high bits IPC within call + if (is_opt_call(n, delta_t)) { + has_arg = true; + // Optimized call with delta_t and retval + duration = CyclesToNsec10(tfull + delta_t, params) - nsec10; + if (duration == 0) {duration = 1;} // We enforce here a minimum duration of 10ns + } else { + retval = 0; + } + + // Remember timer interrupt start time, for PC sample fixup below + if (is_timer_irq(n)) { + prior_timer_irq_nsec10[current_cpu] = nsec10; + } + + // Pick off non-standard PC values here + // + // Either of two forms: + // (1) Possible future v4 with ts/event swapped + // +-----------+---+-----------------------------------------------+ + // | event |///| PC | + // +-----------+---+-----------------------------------------------+ + // 12 4 48 + // (2) Current scaffolding + // +-------------------+-----------+---------------+-------+-------+ + // | timestamp | event | delta | retval| arg0 | + // +-------------------+-----------+---------------+-------+-------+ + // | PC | + // +---------------------------------------------------------------+ + // 64 + // Just deal with form (2) right now + // + // 2021.04.05 We now include the CPU frequency sample as arg0 in this entry if nonzero. + // Extract it as a separate KUTRACE_PSTATE event. + // + if (is_pc_sample(n)) { + has_arg = true; + extra_word = true; + uint64 pc_sample = traceblock[++i]; // Consume second word, the PC sample + // Change to PC eventnum, either kernel or user sample address + event = n = (pc_sample & 0x8000000000000000LLU) ? KUTRACE_PC_K : KUTRACE_PC_U; + + // The PC sample is generated after the local_timer interrupt, but we really + // want its sample time to be just before that interrupt. We move it back here. + if (prior_timer_irq_nsec10[current_cpu] != 0) { + nsec10 = prior_timer_irq_nsec10[current_cpu] - 1; // 10 nsec before timer IRQ + } + uint64 freq_mhz = arg; + // Put a hash of the PC name into arg, so HTML display can choose colors quickly + arg = (pc_sample >> 6) & 0xFFFF; // Initial hash just uses PC bits <21:6> + // This is used for drawing color + // If addrtoline is used later, reset arg + retval = 0; + ipc = 0; + char temp_hex[24]; + sprintf(temp_hex, "PC=%012llx", pc_sample); // Normally 48-bit PC + name = string(temp_hex); + + // Output the frequency event first if nonzero + if (0 < freq_mhz) { + OutputEvent(stdout, nsec10, 1, KUTRACE_PSTATE, current_cpu, + current_pid[current_cpu], current_rpc[current_cpu], + freq_mhz, 0, 0, "freq"); + ++event_count; // stats + } + } + + // If this is a special event marker, keep the name and arg + if (is_special(n)) { + has_arg = true; + name.append(string(kSpecialName[n & 0x001f])); + if (has_rpcid(n)) { + name = AppendNum(names[arg | 0x30000], arg); // method.rpcid + } else if (is_lock(n)) { + name = string(kSpecialName[n & 0x001f]) + names[arg | 0x20000]; // try_lockname etc. + } else if (is_raw_pkt_hash(n) || is_user_msg_hash(n)) { + uint64 hash16 = ((argall >> 16) ^ argall) & 0xffffLLU; // HTML shows this 16-bit hash + name = AppendHexNum(name, hash16); + } else if (n == KUTRACE_RUNNABLE) { + // Include which PID is being made runnable, from arg + name = AppendNum(name, arg); + } + if (duration == 0) {duration = 1;} // We enforce here a minimum duration of 10ns + } + + // If this is an unoptimized return, move the arg value to retval + if (is_return(n)) { + has_arg = true; + retval = arg; + arg = 0; + } + + // If this is a call to an irq bottom half routine, name it + if (is_bottom_half(n)) { + has_arg = true; + name.append(":"); + name.append(string(soft_irq_name[arg & 0x000f])); + } + + // If this is a packet rx or tx, remember the time + // Step (1) of RPC-to-packet correlation + // NOTE: the hash stored in KUTRACE_RX_PKT KUTRACE_TX_PKT is 32 bits + // Convention: hash16 is always shown in hex caps. Other numbers in decimal + if (is_raw_pkt_hash(n) || is_user_msg_hash(n)) { + arg = argall; // Retain all 32 bits in output + } + + // If this packet is an RPC processing start, look to create the message span + // arg is the rpcid and arg_hi is the 16-bit packet-beginning hash + // Step (3) of RPC-to-packet correlation + if (is_rpc_msg(n) && (arg != 0)) { + arg = argall; // Retain all 32 bits in output + } + + // MARK_A,B,C arg is six base-40 chars NUL, A_Z, 0-9, . - / + // MARK_D arg is unsigned int + // +-------------------+-----------+-------------------------------+ + // | timestamp | event | arg | + // +-------------------+-----------+-------------------------------+ + // 20 12 32 + if (is_mark_abc(n)) { + has_arg = true; + // Include the marker label string, from all 32 bits af argument + arg = argall; // Retain all 32 bits in output + name += "="; + char temp[8]; + name += Base40ToChar(arg, temp); + } + + // Debug output. Raw 64-bit event in hex + if (hexevent) { + fprintf(stdout, "%05llx.%03llx ", + (traceblock[entry_i] >> 44) & 0xFFFFF, + (traceblock[entry_i] >> 32) & 0xFFF); + if (has_arg) { + fprintf(stdout, " %04llx%04llx ", + (traceblock[entry_i] >> 16) & 0xFFFF, + (traceblock[entry_i] >> 0) & 0xFFFF); + } else { + fprintf(stdout, " "); + } + } + + // Output the trace event + // Output format: + // time dur event cpu pid rpc arg retval IPC name(event) + OutputEvent(stdout, nsec10, duration, event, current_cpu, + current_pid[current_cpu], current_rpc[current_cpu], + arg, retval, ipc, name.c_str()); + // Update some statistics + ++event_count; // stats + + if (hexevent && extra_word) { + fprintf(stdout, " %16llx\n", traceblock[entry_i + 1]); + } + + // Do deferred switch to rpcid = 0 + if (deferred_rpcid0) {current_rpc[current_cpu] = 0;} + + } + //------------------------------------------------------------------------// + // End inner loop over eight-byte entries // + //------------------------------------------------------------------------// + + ++blocknumber; + + } // while (fread... + //--------------------------------------------------------------------------// + // End outer loop over blocks // + //--------------------------------------------------------------------------// + + + fclose(f); + + // Pass along the OR of all incoming raw traceblock flags, in particular IPC_Flag + fprintf(stdout, "# ## FLAGS: %d\n", all_flags); + + + // Reduce timestamps to start at no more than 60 seconds after the base minute. + // With wraparound tracing, we don't know the true value of lo_timestamp until + // possibly the very last input block. So we offset here. The output file already + // has the larger times so eventtospan will reduce those. + uint64 extra_minutes = lo_timestamp / 6000000000l; + uint64 offset_timestamp = extra_minutes * 6000000000l; + lo_timestamp -= offset_timestamp; + hi_timestamp -= offset_timestamp; + double lo_seconds = lo_timestamp / 100000000.0; + double hi_seconds = hi_timestamp / 100000000.0; +if (lo_seconds < 0.0) {fprintf(stderr,"BUG: lo_seconds < 0.0 %12.8f\n", lo_seconds);} +if (hi_seconds > 999.0) {fprintf(stderr,"BUG: hi_seconds > 999.0 %12.8f\n", hi_seconds);} + double total_seconds = hi_seconds - lo_seconds; + if (total_seconds <= 0.0) { + lo_seconds = 0.0; + hi_seconds = 1.0; + total_seconds = 1.0; // avoid zdiv + } + // Pass along the time bounds + fprintf(stdout, "# ## TIMES: %10.8f %10.8f\n", lo_seconds, hi_seconds); + + + uint64 total_cpus = unique_cpus.size(); + if (total_cpus == 0) {total_cpus = 1;} // avoid zdiv + + fprintf(stderr, "rawtoevent(%3.1fMB):\n", + blocknumber / kTraceBlocksPerMB); + fprintf(stderr, + " %s, %lld events, %lld CPUs (%1.0f/sec/cpu)\n", + FormatSecondsDateTime(base_usec_timestamp / 1000000), + event_count, total_cpus, (event_count / total_seconds) /total_cpus); + uint64 total_irqs = events_by_type[5] + events_by_type[7]; + uint64 total_traps = events_by_type[4] + events_by_type[6]; + uint64 total_sys64 = events_by_type[8] + events_by_type[9] + + events_by_type[10] + events_by_type[11]; + uint64 total_sys32 = events_by_type[12] + events_by_type[13] + + events_by_type[14] + events_by_type[15]; + + fprintf(stderr, " %lld IRQ, %lld Trap, %lld Sys64, %lld Sys32, %lld Mark\n", + total_irqs, total_traps, total_sys64, total_sys32, total_marks); + fprintf(stderr, " %lld PIDs, %lld context-switches (%1.0f/sec/cpu)\n", + (u64)unique_pids.size(), ctx_switches, (ctx_switches / total_seconds) / total_cpus); + fprintf(stderr, + " %5.3f elapsed seconds: %5.3f to %5.3f\n", + total_seconds, lo_seconds, hi_seconds); + +} + diff --git a/book-user-code/samptoname_k.cc b/book-user-code/samptoname_k.cc new file mode 100644 index 000000000000..036aeeb6a09e --- /dev/null +++ b/book-user-code/samptoname_k.cc @@ -0,0 +1,218 @@ +// Little program to paste in kernel names for PC addresses +// Copyright 2021 Richard L. Sites +// +// Filter from stdin to stdout +// One command-line parameter -- allsyms file name +// $ cat foo.json |./samptoname_k >foo_with_k_pc.json +// +// +// Compile with g++ -O2 samptoname_k.cc -o samptoname_k +// +// Input from stdin is a KUtrace json file, some of whose events are +// PC samples of kernel addresses. We want to rewrite these with the +// corresponding routine name, taken from the second input. +// +// ts dur cpu pid rpc event arg ret name--------------------> +// [ 0.00000000, 0.00400049, -1, -1, 33588, 641, 61259, 0, 0, "PC=ffffffffb43bd2e7"] +// +// Second input from filename is from +// sudo cat /proc/kallsyms |sort >somefile.txt +// +// ffffffffb43bd2a0 T clear_page_orig +// ffffffffb43bd2e0 T clear_page_erms +// ffffffffb43bd2f0 T cmdline_find_option_bool +// ffffffffb43bd410 T cmdline_find_option +// +// Output to stdout is the input json with names substituted and the +// hash code in arg updated +// [ 0.00000000, 0.00400049, -1, -1, 33588, 641, 12345, 0, 0, "PC=clear_page_erms"] +// + + +#include +#include + +#include +#include // exit +#include + +#include "basetypes.h" +#include "kutrace_lib.h" + +using std::string; +using std::map; + +typedef struct { + double start_ts; // Seconds + double duration; // Seconds + int64 start_ts_ns; + int64 duration_ns; + int cpu; + int pid; + int rpcid; + int eventnum; + int arg; + int retval; + int ipc; + string name; +} OneSpan; + +typedef map SymMap; + +// Add dummy entry that sorts last, then close the events array and top-level json +void FinalJson(FILE* f) { + fprintf(f, "[999.0, 0.0, 0, 0, 0, 0, 0, 0, 0, \"\"]\n"); // no comma + fprintf(f, "]}\n"); +} + +static const int kMaxBufferSize = 256; + +// Read next line, stripping any crlf. Return false if no more. +bool ReadLine(FILE* f, char* buffer, int maxsize) { + char* s = fgets(buffer, maxsize, f); + if (s == NULL) {return false;} + int len = strlen(s); + // Strip any crlf or cr or lf + if (s[len - 1] == '\n') {s[--len] = '\0';} + if (s[len - 1] == '\r') {s[--len] = '\0';} + return true; +} + + +void ReadAllsyms(FILE* f, SymMap* allsyms) { + uint64 addr = 0LL; + char buffer[kMaxBufferSize]; + while (ReadLine(f, buffer, kMaxBufferSize)) { + size_t len = strlen(buffer); + size_t space1 = strcspn(buffer, " \t"); + if (len <= space1) {continue;} + buffer[space1] = '\0'; + + size_t space2 = space1 + 1 + strcspn(buffer + space1 + 1, " \t"); + if (len <= space2) {continue;} + buffer[space2] = '\0'; + + size_t space3 = space2 + 1 + strcspn(buffer + space2 + 1, " \t"); + // Space3 is optional + buffer[space3] = '\0'; + + int n = sscanf(buffer, "%llx", &addr); + if (n != 1) {continue;} + string name = string(buffer + space2 + 1); + (*allsyms)[addr] = name; +//fprintf(stdout, "allsyms[%llx] = %s\n", addr, name.c_str()); + } + // We don't know how far the last item extends. + // Arbitrarily assume that it is 4KB and add a dummy entry at that end + if (addr < 0xffffffffffffffffL - 4096L) { + (*allsyms)[addr + 4096] = string("-dummy-"); + } +} + +string Lookup(const string& s, const SymMap& allsyms) { + if (s.find_first_not_of("0123456789abcdef") != string::npos) { + // Not valid hex. Leave unchanged. +//fprintf(stdout, "Lookup(%s) unchanged\n", s.c_str()); + return string(""); + } + uint64 addr = 0; + sscanf(s.c_str(), "%llx", &addr); + SymMap::const_iterator it = allsyms.upper_bound(addr); // Just above addr + it = prev(it); // At or just below addr +//fprintf(stdout, "Lookup(%s %llx) = %s\n", s.c_str(), addr, it->second.c_str()); + return it->second; +} + +// Cheap 16-bit hash so we can mostly distinguish different routine names +int NameHash(const string& s) { + uint64 hash = 0L; + for (int i = 0; i < s.length(); ++i) { + uint8 c = s[i]; // Make sure it is unsigned + hash = (hash << 3) ^ c; // ignores leading chars if 21 < len + } + hash ^= (hash >> 32); // Fold down + hash ^= (hash >> 16); + int retval = static_cast(hash & 0xffffL); + return retval; +} + + + +// Input is a json file of spans +// start time and duration for each span are in seconds +// Output is a smaller json file of fewer spans with lower-resolution times +void Usage() { + fprintf(stderr, "Usage: spantopcnamek \n"); + exit(0); +} + +// +// Filter from stdin to stdout +// +int main (int argc, const char** argv) { + if (argc < 2) {Usage();} + + // Input allsyms file + SymMap allsyms; + + const char* fname = argv[1]; + FILE* f = fopen(fname, "r"); + if (f == NULL) { + fprintf(stderr, "%s did not open\n", fname); + exit(0); + } + ReadAllsyms(f, &allsyms); + fclose(f); + + + // expecting: + // ts dur cpu pid rpc event arg ret name--------------------> + // [ 0.00000000, 0.00400049, -1, -1, 33588, 641, 61259, 0, 0, "PC=ffffffffb43bd2e7"], + + int output_events = 0; + char buffer[kMaxBufferSize]; + while (ReadLine(stdin, buffer, kMaxBufferSize)) { + char buffer2[256]; + buffer2[0] = '\0'; + OneSpan onespan; + int n = sscanf(buffer, "[%lf, %lf, %d, %d, %d, %d, %d, %d, %d, %s", + &onespan.start_ts, &onespan.duration, + &onespan.cpu, &onespan.pid, &onespan.rpcid, + &onespan.eventnum, &onespan.arg, &onespan.retval, &onespan.ipc, buffer2); + onespan.name = string(buffer2); + // fprintf(stderr, "%d: %s\n", n, buffer); + + if (n < 10) { + // Copy unchanged anything not a span + fprintf(stdout, "%s\n", buffer); + continue; + } + if (onespan.start_ts >= 999.0) {break;} // Always strip 999.0 end marker and stop + + if (onespan.eventnum == KUTRACE_PC_K) { + string oldname = onespan.name.substr(4); // Skip over "PC= + size_t quote2 = oldname.find("\""); + if (quote2 != string::npos) {oldname = oldname.substr(0, quote2);} + string newname = Lookup(oldname, allsyms); + if (!newname.empty()) { + onespan.name = "\"PC=" + newname + "\"],"; + onespan.arg = NameHash(newname); + } + } + +#if 1 + // Name has trailing punctuation, including ], + fprintf(stdout, "[%12.8f, %10.8f, %d, %d, %d, %d, %d, %d, %d, %s\n", + onespan.start_ts, onespan.duration, + onespan.cpu, onespan.pid, onespan.rpcid, onespan.eventnum, + onespan.arg, onespan.retval, onespan.ipc, onespan.name.c_str()); + ++output_events; +#endif + } + + // Add marker and closing at the end + FinalJson(stdout); + fprintf(stderr, "spantopcnamek: %d events\n", output_events); + + return 0; +} diff --git a/book-user-code/samptoname_u.cc b/book-user-code/samptoname_u.cc new file mode 100644 index 000000000000..b69dacf9d3e3 --- /dev/null +++ b/book-user-code/samptoname_u.cc @@ -0,0 +1,378 @@ +// Little program to paste in user names for PC addresses +// Copyright 2021 Richard L. Sites +// +// Filter from stdin to stdout +// One command-line parameter -- pidmaps file name +// +// +// TODO: +// cache lookup results to avoid excess command spawning +// +// Compile with g++ -O2 spantopcnameu.cc -o spantopcnameu +// +// Input from stdin is a KUtrace json file, some of whose events are +// PC samples of kernel addresses. We want to rewrite these with the +// corresponding routine name, taken from the second input. +// +// ts dur cpu pid rpc event arg ret name--------------------> +// [ 26.65163778, 0.00400013, 2, 10129, 37094, 640, 58156, 0, 0, "PC=7f31f1f8cb1f"], +// +// Second input from filename is from +// sudo ls /proc/*/maps |xargs -I % sh -c 'echo "\n====" %; sudo cat %' >somefile.txt +// +// ==== /proc/10000/maps +// 5636c2def000-5636c2eac000 r-xp 00000000 08:02 5510282 /usr/sbin/sshd +// 5636c30ab000-5636c30ae000 r--p 000bc000 08:02 5510282 /usr/sbin/sshd +// 5636c30ae000-5636c30af000 rw-p 000bf000 08:02 5510282 /usr/sbin/sshd +// 5636c30af000-5636c30b8000 rw-p 00000000 00:00 0 +// 5636c31be000-5636c31ee000 rw-p 00000000 00:00 0 [heap] +// 7f0c2c372000-7f0c2c37c000 r-xp 00000000 08:02 5374723 /lib/x86_64-linux-gnu/security... +// address perms offset dev inode pathname +// see mmap(2) +// +// Note that we only care about the executable regions in the above: in r-xp the "x" +// +// Output to stdout is the input json with names substituted and the +// hash code in arg updated +// [ 0.00000000, 0.00400049, -1, -1, 33588, 641, 12345, 0, 0, "PC=memcpy-ssse3.S:1198"] +// + + +#include +#include + +#include +#include // exit +#include + +#include "basetypes.h" +#include "kutrace_lib.h" + +#define BUFFSIZE 256 +#define CR 0x0d +#define LF 0x0a + +static const int kMaxBufferSize = 256; + +using std::string; +using std::map; + +typedef struct { + double start_ts; // Seconds + double duration; // Seconds + int64 start_ts_ns; + int64 duration_ns; + int cpu; + int pid; + int rpcid; + int eventnum; + int arg; + int retval; + int ipc; + string name; +} OneSpan; + +// We are going to pack a PID and the low address of a range into a single 64 bit key: +// top 16 bits for PID, low 48 bits for address. This could be changed later +// to use a pair of uint64 or a single uint128 as key. + +typedef struct { + uint64 addr_lo; + uint64 addr_hi; + uint64 pid; + string pathname; +} RangeToFile; + +typedef map MapsMap; + +// Add dummy entry that sorts last, then close the events array and top-level json +void FinalJson(FILE* f) { + fprintf(f, "[999.0, 0.0, 0, 0, 0, 0, 0, 0, 0, \"\"]\n"); // no comma + fprintf(f, "]}\n"); +} + +// Read next line, stripping any crlf. Return false if no more. +bool ReadLine(FILE* f, char* buffer, int maxsize) { + char* s = fgets(buffer, maxsize, f); + if (s == NULL) {return false;} + int len = strlen(s); + // Strip any crlf or cr or lf + if (s[len - 1] == '\n') {s[--len] = '\0';} + if (s[len - 1] == '\r') {s[--len] = '\0';} + return true; +} + +void DumpRangeToFile(FILE* f, const RangeToFile rtf) { + fprintf(f, "%lld %llx %llx %s\n", rtf.pid, rtf.addr_lo, rtf.addr_hi, rtf.pathname.c_str()); +} + +// 5636c2def000-5636c2eac000 r-xp 00000000 08:02 5510282 /usr/sbin/sshd +void ReadAllmaps(FILE* f, MapsMap* allmaps) { + uint64 addr_lo = 0L; + uint64 addr_hi = 0L; + uint64 current_pid = 0L; + char buffer[kMaxBufferSize]; + while (ReadLine(f, buffer, kMaxBufferSize)) { + size_t len = strlen(buffer); + if (memcmp(buffer, "==== /proc/", 11) == 0) { + current_pid = atoi(&buffer[11]); +//fprintf(stdout, "pid %lld %s\n", current_pid, buffer); + continue; + } + + size_t space1 = strcspn(buffer, " \t"); + if (len <= space1) {continue;} + buffer[space1] = '\0'; + + size_t space2 = space1 + 1 + strcspn(buffer + space1 + 1, " \t"); + if (len <= space2) {continue;} + buffer[space2] = '\0'; + + size_t slash = space2 + 1 + strcspn(buffer + space2 + 1, "/"); + if (len <= slash) {continue;} + + int n = sscanf(buffer, "%llx-%llx", &addr_lo, &addr_hi); + if (n != 2) {continue;} + + if (strchr(buffer + space1 + 1, 'x') ==NULL) {continue;} + + string pathname = string(buffer + slash); + + RangeToFile temp; + temp.addr_lo = addr_lo; + temp.addr_hi = addr_hi; + temp.pid = current_pid; + temp.pathname = pathname; + uint64 key = (current_pid << 48) | (addr_lo & 0x0000FFFFFFFFFFFFL); + + (*allmaps)[key] = temp; +//fprintf(stdout, "allmaps[%016lx] = ", key); +//DumpRangeToFile(stdout, temp); + } +} + + +// a must be the smaller +bool IsClose(uint32 a, uint32 b) { + return (b - a) < 12; // An arbitrary limit of 12 consecutive PIDs +} + +// Returns 0 if not valid hex +uint64 GetFromHex(const string& s) { + if (s.find_first_not_of("0123456789abcdef") != string::npos) { + return 0L; + } + uint64 addr = 0; + sscanf(s.c_str(), "%llx", &addr); + return addr; +} + +const RangeToFile* Lookup(int pid, uint64 addr, const MapsMap& allmaps) { + bool fail = false; + if (pid < 0) {fail = true;} + if (fail) {return NULL;} + + uint64 key = pid; + key = (key << 48) | (addr & 0x0000FFFFFFFFFFFFL); + MapsMap::const_iterator it = allmaps.upper_bound(key); // Just above key + it = prev(it); // At or just below key +//fprintf(stdout, "Lookup(%d %llx %llx) = ", pid, addr, key); +//DumpRangeToFile(stdout, it->second); + + // If process P spawns processes Q R and S, most often they will have PIDs P+1 P+2 and P+3 + // and of course the same shared memory map. In this case, the lookup above will hit on the + // entry just after the LAST entry for P before we call prev(). We can see that the PID for + // Q does not match the lookup result that is at the end of P, but we can see that Q's PID + // of P+1 is "close". If that happens, we try looking up again using pID P in the key. + + if ((it->second.pid != pid) && (IsClose(it->second.pid, pid))) { + // Second try with the lower PID + uint64 maybeparentpid = it->second.pid; + key = (maybeparentpid << 48) | (addr & 0x0000FFFFFFFFFFFFL); + it = allmaps.upper_bound(key); // Just above key + it = prev(it); // At or just below key +//fprintf(stdout, "Lookup2(%lld %llx %llx) = ", maybeparentpid, addr, key); +//DumpRangeToFile(stdout, it->second); + if (it->second.pid != maybeparentpid) {fail = true;} + if (addr < it->second.addr_lo) {fail = true;} + if (it->second.addr_hi <= addr) {fail = true;} + } else { + // Double-check that we had a real hit + if (it->second.pid != pid) {fail = true;} + if (addr < it->second.addr_lo) {fail = true;} + if (it->second.addr_hi <= addr) {fail = true;} + } + + if (fail) { +//fprintf(stdout, " false hit\n"); + return NULL; + } + + return &it->second; +} + + +char* NoArgs(char* procname) { + char* paren = strchr(procname, '('); + if (paren != NULL) {*paren = '\0';} + return procname; +} + +// Expecting two lines, the procedure name (from -f) and the file:line# +// If file:line# is unknown (not enough debug info), then it is ??:? +// The demangled (from -C) procedure name may have argument types. +// The file name does not have the full path (from -s) +// If file:line# is known, use it, else use procedure name up to any parenthesis +const char* GetProcFileName(const char* cmd, char* buffer) { + FILE* fp = popen(cmd, "r"); + + if (fp == NULL) { + //fprintf(stderr, "Pipe did not open\n"); + return NULL; + } + + size_t n = fread(buffer, 1, BUFFSIZE, fp); + pclose(fp); + buffer[n] = '\0'; + + char* lf = strchr(buffer, LF); + if (lf != NULL) {lf[0] = '\0';} + if (lf == NULL) { // Not even one complete line + return NULL; + } + if (lf == &buffer[n]){ // One line + return NoArgs(buffer); + } + // Two lines, as expected + char* fileline = &lf[1]; + lf = strchr(fileline, LF); + if (lf != NULL) {lf[0] = '\0';} +//VERYTEMP always return routine name +return NoArgs(buffer); + + if (memcmp(fileline, "??:?", 4) == 0) { + return NoArgs(buffer); + } + return fileline; +} + +const char* DoAddr2line(const string& pathname, uint64 offset, char* buffer) { + char cmd[256]; + sprintf(cmd, "addr2line -fsC -e %s %llx", pathname.c_str(), offset); + return GetProcFileName(cmd, buffer); +} + + +// Cheap 16-bit hash so we can mostly distinguish different routine names +int NameHash(const string& s) { + uint64 hash = 0L; + for (int i = 0; i < s.length(); ++i) { + uint8 c = s[i]; // Make sure it is unsigned + hash = (hash << 3) ^ c; // ignores leading chars if 21 < len + } + hash ^= (hash >> 32); // Fold down + hash ^= (hash >> 16); + int retval = static_cast(hash & 0xffffL); + return retval; +} + +void PossiblyReplaceName(OneSpan* onespan, const MapsMap& allmaps) { + string oldname = onespan->name.substr(4); // Skip over "PC= + size_t quote2 = oldname.find("\""); + if (quote2 != string::npos) {oldname = oldname.substr(0, quote2);} // Chop trailing "... + uint64 addr = GetFromHex(oldname); + if (addr == 0L) {return;} // Not a hex address that we can map + const RangeToFile* rtf = Lookup(onespan->pid, addr, allmaps); + if (rtf == NULL) {return;} // Nothing found by lookup + + // We now have the pathname of an executable image containing the address + // WE ARE NOT DONE YET. This is just the exec file name + string pathname = rtf->pathname; + uint64 offset = addr - rtf->addr_lo; + + // Now execute command: addr2line -fsC -e /lib/x86_64-linux-gnu/libc-2.27.so 0x18eb1f + // and parse the result into filename:line# or procname + char buffer[256]; + const char* newname = DoAddr2line(pathname, offset, buffer); + if (newname != NULL) { + // Fixup non-debug libc mapping memcpy into __nss_passwd_lookup + if (strcmp(newname, "__nss_passwd_lookup") == 0) {newname = "memcpy";} + onespan->name = string("\"PC=") + newname + "\"],"; + onespan->arg = NameHash(newname); +//fprintf(stdout, "%s => %s\n", oldname.c_str(), newname); + } +} + + + +// Input is a json file of spans +// start time and duration for each span are in seconds +// Output is a smaller json file of fewer spans with lower-resolution times +void Usage() { + fprintf(stderr, "Usage: spantopcnameu \n"); + exit(0); +} + +// +// Filter from stdin to stdout +// +int main (int argc, const char** argv) { + if (argc < 2) {Usage();} + + // Input allmaps file + MapsMap allmaps; + + const char* fname = argv[1]; + FILE* f = fopen(fname, "r"); + if (f == NULL) { + fprintf(stderr, "%s did not open\n", fname); + exit(0); + } + ReadAllmaps(f, &allmaps); + fclose(f); + + + // expecting: + // ts dur cpu pid rpc event arg ret name--------------------> + // [ 0.00000000, 0.00400049, -1, -1, 33588, 641, 61259, 0, 0, "PC=ffffffffb43bd2e7"], + + int output_events = 0; + char buffer[kMaxBufferSize]; + while (ReadLine(stdin, buffer, kMaxBufferSize)) { + char buffer2[256]; + buffer2[0] = '\0'; + OneSpan onespan; + int n = sscanf(buffer, "[%lf, %lf, %d, %d, %d, %d, %d, %d, %d, %s", + &onespan.start_ts, &onespan.duration, + &onespan.cpu, &onespan.pid, &onespan.rpcid, + &onespan.eventnum, &onespan.arg, &onespan.retval, &onespan.ipc, buffer2); + onespan.name = string(buffer2); + // fprintf(stderr, "%d: %s\n", n, buffer); + + if (n < 10) { + // Copy unchanged anything not a span + fprintf(stdout, "%s\n", buffer); + continue; + } + if (onespan.start_ts >= 999.0) {break;} // Always strip 999.0 end marker and stop + + if (onespan.eventnum == KUTRACE_PC_U) { + PossiblyReplaceName(&onespan, allmaps); + } + +#if 1 + // Name has trailing punctuation, including ], + fprintf(stdout, "[%12.8f, %10.8f, %d, %d, %d, %d, %d, %d, %d, %s\n", + onespan.start_ts, onespan.duration, + onespan.cpu, onespan.pid, onespan.rpcid, onespan.eventnum, + onespan.arg, onespan.retval, onespan.ipc, onespan.name.c_str()); + ++output_events; +#endif + } + + // Add marker and closing at the end + FinalJson(stdout); + fprintf(stderr, "spantopcnameu: %d events\n", output_events); + + return 0; +} diff --git a/book-user-code/schedtest.cc b/book-user-code/schedtest.cc new file mode 100644 index 000000000000..77269df511a1 --- /dev/null +++ b/book-user-code/schedtest.cc @@ -0,0 +1,148 @@ +// schedtest.cc +// Little program to observe scheduler choices +// Copyright 2021 Richard L. Sites +// +// compile with g++ -O2 -pthread schedtest.cc kutrace_lib.cc -o schedtest + +#include +#include +#include +#include +#include +#include + +#include "basetypes.h" +#include "kutrace_lib.h" + +// From Jenkins hash +#define mix(a,b,c) \ +{ \ + a -= b; a -= c; a ^= (c>>13); \ + b -= c; b -= a; b ^= (a<<8); \ + c -= a; c -= b; c ^= (b>>13); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<16); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>3); \ + b -= c; b -= a; b ^= (a<<10); \ + c -= a; c -= b; c ^= (b>>15); \ +} + + +enum SchedType { + CFS = 0, + FIFO, + RR +}; + +/* Count is chosen to run main loop about 1 second */ +static const int kLOOPCOUNT = 8000; + +/* Size is chosen to fit into a little less thsan 256KB */ +static const int kSIZE = 64 * 960; /* 4-byte words */ + +/* Calculate a hash over s, some multiple of 12 bytes long */ +/* Length is count of 32-bit words */ +uint32 hash(uint32* s, uint32 length, uint32 initval) { + uint32 a,b,c,len; + + /* Set up the internal state */ + len = length; + a = b = 0x9e3779b9; /* the golden ratio; an arbitrary value */ + c = initval; /* the previous hash value */ + + /*---------------------------------------- handle most of the string */ + while (len >= 3) + { + a += s[0] ; + b += s[1]; + c += s[2]; + mix(a,b,c); + s += 3; len -= 3; + } + /*-------------------------------------------- report the result */ + return c; +} + + +/* Do some work for about a second */ +/* Return hashval to make it live; caller ignores */ +void* CalcLoop(void* unused_arg) { + //fprintf(stdout, " CalcLoop(%d)\n", *(int*)unused_arg); + + /* Simple arbitrary initialization */ + uint32 foo[kSIZE]; /* A little less than 256KB */ + for (int i = 0; i < kSIZE; ++i) {foo[i] = (i & 1023) * 1041667;} + + /* Main loop */ + volatile uint32 hashval = 0; + for (int i = 0; i < kLOOPCOUNT; ++i) { + hashval = hash(foo, kSIZE, hashval); + } + + return NULL; +} + + +void DoParallel(int n, SchedType schedtype) { + kutrace::mark_d(n); + //fprintf(stdout, "DoParallel(%d)\n", n); + pthread_t* thread_id = (pthread_t*)malloc(n * sizeof(pthread_t)); + /* Spawn n threads */ + for (int i = 0; i < n; ++i) { + pthread_attr_t attr; + struct sched_param sparam; + sparam.sched_priority = 1; + pthread_attr_init(&attr); + /* Defaults to CFS, called SCHED_OTHER */ + if (schedtype == FIFO) { + pthread_attr_setschedpolicy(&attr, SCHED_FIFO); + pthread_attr_setschedparam(&attr, &sparam); + pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); + } + if (schedtype == RR) { + pthread_attr_setschedpolicy(&attr, SCHED_RR); + pthread_attr_setschedparam(&attr, &sparam); + pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); + } + + int iret = pthread_create(&thread_id[i], NULL, CalcLoop, &i); + if (iret != 0) {fprintf(stderr, "pthread_create() error %d\n", iret);} + + pthread_attr_destroy(&attr); + } + + /* Wait for all n threads to finish */ + for (int i = 0; i < n; ++i) { + pthread_join(thread_id[i], NULL); + //fprintf(stdout, " ret[%d]\n", i); + } + + free(thread_id); + //fprintf(stdout, "\n"); + +}; + +void Usage() { + fprintf(stderr, "Usage: schedtest [-cfs(d) | -fifo | -rr]\n"); + exit(EXIT_FAILURE); +} + + +// Spawn different numbers of parallel threads +int main(int argc, const char** argv) { + SchedType schedtype = CFS; // default + for (int i = 1; i < argc; ++i) { + if (strcmp(argv[i], "-cfs") == 0) {schedtype = CFS;} + if (strcmp(argv[i], "-fifo") == 0) {schedtype = FIFO;} + if (strcmp(argv[i], "-rr") == 0) {schedtype = RR;} + } + + // Spawn 1..12 parallel processes + for (int n = 1; n <= 12; ++n) { + DoParallel(n, schedtype); + } + + exit(EXIT_SUCCESS); +} + diff --git a/book-user-code/server4.cc b/book-user-code/server4.cc new file mode 100644 index 000000000000..b314afa3f913 --- /dev/null +++ b/book-user-code/server4.cc @@ -0,0 +1,446 @@ +// server4.cc cloned from server2.cc 2018.04.16 +// Copyright 2021 Richard L. Sites +// +// compile with g++ -O2 -pthread server4.cc dclab_log.cc dclab_rpc.cc kutrace_lib.cc spinlock.cc -o server4 + +#include +#include +#include +#include +#include +#include +#include // read() +#include +#include /* superset of previous */ +#include +#include +#include + +#include +#include + +#include "basetypes.h" +#include "dclab_log.h" +#include "dclab_rpc.h" +#include "kutrace_lib.h" +#include "spinlock.h" +#include "timecounters.h" + +using std::map; +using std::string; + + +typedef map MapKeyValue; + +typedef struct { + LockAndHist lockandhist; + const char* logfilename; + FILE* logfile; + MapKeyValue key_value; +} SharedData; + +typedef struct { + uint16 portnum; + SharedData* shareddata; +} PerThreadData; + +static const int kMaxRunTimeSeconds = 4 * 60; + +// Global flags +static bool verbose = false; +static bool verbose_data = false; +static bool stopping = false; // Any thread can set this true + +int OpenSocket(int16 portnum) { + // Open a TCP/IPv4 socket. + // Returns file descriptor if OK, -1 and sets errno if bad + //fprintf(stderr, "server4: Open server socket\n"); + int sockfd = socket(AF_INET, SOCK_STREAM, 0); + if (sockfd < 0) {Error("Socket open");} + + // Bind this socket to a particular TCP/IP port. + // Construct server address structure first + struct sockaddr_in server_addr; + memset(&server_addr, 0, sizeof(server_addr)); + server_addr.sin_family = AF_INET; // TCP/IPv4 + server_addr.sin_addr.s_addr = INADDR_ANY; // Find out my IP address + server_addr.sin_port = htons(portnum); // host-to-network of short int portnum + + //fprintf(stderr, "server4: Bind server socket, port %d\n", portnum); + int iretbind = bind(sockfd, + reinterpret_cast(&server_addr), + sizeof(server_addr)); + if (iretbind != 0) {Error("Bind socket");} + //fprintf(stderr, "server4: Bound server socket %08x:%04x\n", server_addr.sin_addr.s_addr, server_addr.sin_port); + + return sockfd; +} + +// Accept a client connection and return the new socket fd +int ConnectToClient(int sockfd, uint32* client_ip, uint16* client_port) { + // Listen on the bound port for a connection attempt. + // Allow the default maximum 5 simultaneous attempts (four of which would wait)A + //fprintf(stderr, "server4: listen server socket\n"); + int iretlisten = listen(sockfd, 5); + if (iretlisten != 0) {Error("listen");} + + // Accept an incoming connection + // Reserve client address structure first + // This blocks indefinitely, until a conneciton is tried from some client + struct sockaddr_in client_addr; + memset(&client_addr, 0, sizeof(client_addr)); + + socklen_t clientlen = sizeof(client_addr); // This will get changed to actual client len by accept + int acceptsockfd = accept(sockfd, reinterpret_cast(&client_addr), &clientlen); + if (acceptsockfd < 0) {Error("accept");} + const sockaddr_in* sin = reinterpret_cast(&client_addr); + *client_ip = ntohl(sin->sin_addr.s_addr); + *client_port = ntohs(sin->sin_port); + + // We have a connection from some client machine + fprintf(stderr, "server4: connection from %s\n", FormatIpPort(*client_ip, *client_port)); + return acceptsockfd; +} + +bool ReceiveRequest(int sockfd, RPC* req) { + return ReadOneRPC(sockfd, req, NULL); +} + +bool SendResponse(int sockfd, RPC* resp) { + SendOneRPC(sockfd, resp, NULL); + return true; +} + +string StringPrintf(const char* format, ...) { + va_list args; + char buffer[256]; + va_start(args, format); + int n = vsnprintf(buffer, 256, format, args); + va_end(args); + if (n < 0) {return string("");} + return string(buffer, n); +} + +// Create the repsonse to a ping request +// Returns false on any error, with response status indicating why +bool DoPing(SharedData* shareddata, const RPC* request, RPC* response) { + // We just send the data back unchanged + CopyRPCData(request, response); + return true; +} + +// Read from request argument +// Set result data to +// Return status=fail and empty data if key is not found +bool DoRead(SharedData* shareddata, const RPC* request, RPC* response) { + const uint8* req_data = request->data; + string key = GetStringArg(&req_data); + string value; + { + SpinLock sp(&shareddata->lockandhist); + MapKeyValue::const_iterator it = shareddata->key_value.find(key); + if (it == shareddata->key_value.end()) { + response->header->status = FailStatus; // Let the caller know key wasn't there + } else { + PutStringRPC(it->second, response); + } + } + return true; +} + + +// Write from request arguments +// No result data +bool DoWrite(SharedData* shareddata, const RPC* request, RPC* response) { + const uint8* req_data = request->data; + string key = GetStringArg(&req_data); + string value = GetStringArg(&req_data); + { + SpinLock sp(&shareddata->lockandhist); + shareddata->key_value[key] = value; + } + return true; +} + +// Throw data away as quickly as possible (allow client outbound saturation) +// No result data +bool DoSink(SharedData* shareddata, const RPC* request, RPC* response) { + return true; +} + + +// Delete from request argument +// No result data +// Return status=fail if key is not found +bool DoDelete(SharedData* shareddata, const RPC* request, RPC* response) { + const uint8* req_data = request->data; + string key = GetStringArg(&req_data); + { + SpinLock sp(&shareddata->lockandhist); + MapKeyValue::iterator it = shareddata->key_value.find(key); + if (it == shareddata->key_value.end()) { + response->header->status = FailStatus; // Let tthe caller know key wasn't there + } else { + shareddata->key_value.erase(it); + } + } + return true; +} + + +// Return a string of the 32 spinlock-usec histogram values +bool DoStats(SharedData* shareddata, const RPC* request, RPC* response) { + string result; + { + SpinLock sp(&shareddata->lockandhist); + result.append(StringPrintf("Stats: ")); + for (int i = 0; i < 32; ++i) { + result.append(StringPrintf("%d ", shareddata->lockandhist.hist[i])); + } + PutStringRPC(result, response); + } + return true; +} + + +// Erase all pairs +// No result data +bool DoReset(SharedData* shareddata, const RPC* request, RPC* response) { + { + SpinLock sp(&shareddata->lockandhist); + shareddata->key_value.clear(); + } + return true; +} + + +// Create the repsonse to a quit request +// Returns false on any error, with response status indicating why +bool DoQuit(SharedData* shareddata, const RPC* request, RPC* response) { + return true; +} + +// Create the repsonse showing an erroneous request +// Returns false on any error, with response status indicating why +bool DoError(SharedData* shareddata, const RPC* request, RPC* response) { + // We just send the data back unchanged + CopyRPCData(request, response); + response->header->status = FailStatus; + return false; +} + +// The working-on-RPC events KUTRACE_RPCIDREQ and KUTRACE_RPCIDRESP have this +// format: +// +-------------------+-----------+---------------+-------+-------+ +// | timestamp 2 | event | lglen8 | RPCid | (2) +// +-------------------+-----------+---------------+-------+-------+ +// 20 12 8 8 16 + +// Open a TCP/IP socket, bind it to given port, then listen, etc. +// Outer loop: listen, accept, read/write until closed +// Inner loop: read() until socket is closed or Quit +// Returns true when Quit message is received +void* SocketLoop(void* arg) { + PerThreadData* perthreaddata = reinterpret_cast(arg); + SharedData* shareddata = perthreaddata->shareddata; + int sockfd = OpenSocket(perthreaddata->portnum); + + // Outer loop: listen, accept, read/write*, close connection + for (;;) { + if (stopping) {break;} + + bool ok = true; + uint32 client_ip; + uint16 client_port; + int acceptsockfd = ConnectToClient(sockfd, &client_ip, &client_port); + + int optval = 1; + setsockopt(acceptsockfd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(int)); + setsockopt(acceptsockfd, SOL_SOCKET, SO_REUSEPORT, &optval, sizeof(int)); + + // Inner loop: read/process/write RPCs + for (;;) { + if (stopping) {break;} + + RPC request; + RPC response; + + ok &= ReceiveRequest(acceptsockfd, &request); + if (!ok) {break;} // Most likely, client dropped the connection + + // Process request + request.header->req_rcv_timestamp = GetUsec(); // T2 + request.header->client_ip = client_ip; + request.header->client_port = client_port; + request.header->type = ReqRcvType; + + // Trace the incoming RPC request + // RPCid is pseudo-random 32 bits, but never zero. If low 16 bits are zero, use high 16 bits. + uint32 tempid = rpcid32_to_rpcid16(request.header->rpcid); + uint8 lglen8 = request.header->lglen1; // Request length + + // Upon new request, do these trace entries: + // 1) Method name w/rpcid + // 2) RPC REQ w/rpcid and lglen8 + // 3) Hash of first 32 bytes of request msg, to match a packet hash recorded within the kernel + + // rls 2020.08.23 record the method name for each incoming RPC + kutrace::addname(KUTRACE_METHODNAME, tempid, request.header->method); + + // Start tracing the incoming RPC request + // We also pack in the 16-bit hash over the first 32 bytes of the packet payload + kutrace::addevent(KUTRACE_RPCIDREQ, (lglen8 << 16) | tempid); + + if (verbose) { + fprintf(stdout, "server4: ReceiveRequest: "); + PrintRPC(stdout, &request); + } + LogRPC(shareddata->logfile, &request); + + if (verbose_data) { + // Send method, key, value to stdout + const uint8* req_data = request.data; + const uint8* req_data_limit = request.data + request.datalen; + fprintf(stdout, "%s ", request.header->method); + if (req_data < req_data_limit) { + string key = GetStringArg(&req_data); + fprintf(stdout, "%s ", key.c_str()); + } + if (req_data < req_data_limit) { + string value = GetStringArg(&req_data); + fprintf(stdout, "%s ", value.c_str()); + } + fprintf(stdout, "\n"); + } + + + // Create response + CopyRPCHeader(&request, &response); + response.data = NULL; + response.datalen = 0; + RPCHeader* hdr = response.header; + hdr->type = RespSendType; + hdr->status = SuccessStatus; + + // Insert a marker for serving this method request + kutrace::mark_a(hdr->method); + + // Do the request + if (strcmp(hdr->method, "ping") == 0) {ok &= DoPing(shareddata, &request, &response);} + else if (strcmp(hdr->method, "read") == 0) {ok &= DoRead(shareddata, &request, &response);} + else if (strcmp(hdr->method, "write") == 0) {ok &= DoWrite(shareddata, &request, &response);} + else if (strcmp(hdr->method, "sink") == 0) {ok &= DoSink(shareddata, &request, &response);} + else if (strcmp(hdr->method, "delete") == 0) {ok &= DoDelete(shareddata, &request, &response);} + else if (strcmp(hdr->method, "stats") == 0) {ok &= DoStats(shareddata, &request, &response);} + else if (strcmp(hdr->method, "reset") == 0) {ok &= DoReset(shareddata, &request, &response);} + else if (strcmp(hdr->method, "quit") == 0) { + ok &= DoQuit(shareddata, &request, &response); + stopping = true; + } + else {ok &= DoError(shareddata, &request, &response);} + + // Stop tracing the RPC request + kutrace::addevent(KUTRACE_RPCIDREQ, 0); + + + // Prepare response + lglen8 = TenLg(response.datalen); + hdr->lglen2 = lglen8; // Response length + hdr->resp_send_timestamp = GetUsec(); // T3 + hdr->type = RespSendType; + + // Start tracing response + kutrace::addevent(KUTRACE_RPCIDRESP, (lglen8 << 16) | tempid); + + if (verbose) {fprintf(stdout, "server4: SendResponse: "); PrintRPC(stdout, &response);} + LogRPC(shareddata->logfile, &response); + + // Send response + ok &= SendResponse(acceptsockfd, &response); + + FreeRPC(&request); + FreeRPC(&response); + + // Stop tracing the outgoing RPC response + kutrace::addevent(KUTRACE_RPCIDRESP, 0); + + if (!ok) {break;} // Most likely, client dropped the connection + } + + // Connection was closed -- go back around and wait for another connection + close(acceptsockfd); + } + + close(sockfd); + return NULL; +} + +void Usage() { + fprintf(stderr, "Usage: server4 portnumber [num_ports] [-verbose] [-data]\n"); + exit(EXIT_FAILURE); +} + + +// Just call our little server loop +int main (int argc, const char** argv) { + int base_port = -1; + int num_ports = -1; + for (int i = 1; i < argc; ++i) { + if (strcmp(argv[i], "-verbose") == 0) {verbose = true;} + if (strcmp(argv[i], "-data") == 0) {verbose_data = true;} + else if (argv[i][0] != '-') { + // We have a number + if (base_port < 0) {base_port = atoi(argv[i]);} + else if (num_ports < 0) {num_ports = atoi(argv[i]);} + } + } + // Apply defaults + if (base_port < 0) {base_port = 12345;} + if (num_ports < 0) {num_ports = 4;} + + CalibrateCycleCounter(); + + // Set up the shared data + SharedData shareddata; + memset(&shareddata, 0, sizeof(SharedData)); + shareddata.key_value.clear(); + shareddata.logfilename = MakeLogFileName(argv[0]); + shareddata.logfile = OpenLogFileOrDie(shareddata.logfilename); + + // Set up N ports, N in [1..4] + // This leaks memory for the small number of PerThreadData structures, but + // all we do next is terminate + fprintf(stderr, "\n"); + for (int n = 0; n < num_ports; ++n) { + // Allocate a per-thread data structure and fill it in + PerThreadData* perthreaddata = new PerThreadData; + perthreaddata->portnum = base_port + n; + perthreaddata->shareddata = &shareddata; + + // Launch a pthread to listen on that port + // Create independent threads each of which will execute function SocketLoop + pthread_t thread; + fprintf(stderr, "server4: launching a thread to listen on port %d\n", perthreaddata->portnum); + int iret = pthread_create( &thread, NULL, SocketLoop, (void*) perthreaddata); + if (iret != 0) {Error("pthread_create()", iret);} + } + + int total_seconds = 0; + while (!stopping) { + sleep(2); // Poll every 2 seconds + total_seconds += 2; + if (total_seconds >= kMaxRunTimeSeconds) { + fprintf(stderr, + "server4: timed out after %d minutes (safety move) ...\n", + kMaxRunTimeSeconds / 60); + stopping = true; + } + if (stopping) {break;} + } + + fclose(shareddata.logfile); + fprintf(stderr, "%s written\n", shareddata.logfilename); + + exit(EXIT_SUCCESS); +} + diff --git a/book-user-code/server_disk.cc b/book-user-code/server_disk.cc new file mode 100644 index 000000000000..924f36be281d --- /dev/null +++ b/book-user-code/server_disk.cc @@ -0,0 +1,727 @@ +// server_disk.cc cloned from server4.cc 2018.07.08 +// Serve dummy database from disk not RAM +// Uses /tmp/keyvaluestore which will be created if not there +// Copyright 2021 Richard L. Sites +// +// compile with +// g++ -O2 -pthread server_disk.cc dclab_log.cc dclab_rpc.cc kutrace_lib.cc spinlock_fixed.cc -o server_disk + +#include +#include // For nftw file tree walk +#include +#include +#include +#include +#include +#include // for nanosleep +#include // read(), stat, mkdir +#include +#include /* superset of previous */ +#include +#include +#include // For stat/mkdir +#include // For stat/mkdir, others + +// For -direct disk read/write +#include +#include +#include + +#include +#include + +#include "basetypes.h" +#include "dclab_log.h" +#include "dclab_rpc.h" +#include "kutrace_lib.h" +#include "spinlock.h" +#include "timecounters.h" + +using std::map; +using std::string; + +// Should generate a single rotate instruction when available +inline uint32_t rotl32 (uint32_t x, uint32_t n) +{ + return (x << n) | (x >> (32-n)); +} + +typedef map MapKeyValue; + +typedef struct { + LockAndHist lockandhist; + string directory; // String for easy concatenate + const char* logfilename; + FILE* logfile; + MapKeyValue key_value; +} SharedData; + +typedef struct { + uint16 portnum; + SharedData* shareddata; +} PerThreadData; + +static const int kMaxRunTimeSeconds = 4 * 60; + +static const char* kDirectoryName = "/tmp/keyvaluestore"; + +static const int kPageSize = 4096; // Must be a power of two +static const int kPageSizeMask = kPageSize - 1; + +// Must be a multiple of 4KB +static const int kMaxValueSize = 1025 * 1024; // 1MB + 1KB extra + + +// Global flags +static bool direct = false; // If true, read/write O_DIRECT O_SYNC +static bool verbose = false; +static bool verbose_data = false; +static bool stopping = false; // Any thread can set this true +static int wait_msec = 0; // Extra time to hold lock, for extra interference + +// Wait n msec +void WaitMsec(int msec) { + if (msec == 0) {return;} + struct timespec req; + req.tv_sec = msec / 1000; + req.tv_nsec = (msec % 1000) * 1000000; + nanosleep(&req, NULL); +} + + +int OpenSocket(int16 portnum) { + // Open a TCP/IPv4 socket. + // Returns file descriptor if OK, -1 and sets errno if bad + //fprintf(stderr, "server_disk: Open server socket\n"); + int sockfd = socket(AF_INET, SOCK_STREAM, 0); + if (sockfd < 0) {Error("Socket open");} + + // Bind this socket to a particular TCP/IP port. + // Construct server address structure first + struct sockaddr_in server_addr; + memset(&server_addr, 0, sizeof(server_addr)); + server_addr.sin_family = AF_INET; // TCP/IPv4 + server_addr.sin_addr.s_addr = INADDR_ANY; // Find out my IP address + server_addr.sin_port = htons(portnum); // host-to-network of short int portnum + + //fprintf(stderr, "server_disk: Bind server socket, port %d\n", portnum); + int iretbind = bind(sockfd, + reinterpret_cast(&server_addr), + sizeof(server_addr)); + if (iretbind != 0) {Error("Bind socket");} + //fprintf(stderr, "server_disk: Bound server socket %08x:%04x\n", server_addr.sin_addr.s_addr, server_addr.sin_port); + + return sockfd; +} + +// Accept a client connection and return the new socket fd +int ConnectToClient(int sockfd, uint32* client_ip, uint16* client_port) { + // Listen on the bound port for a connection attempt. + // Allow the default maximum 5 simultaneous attempts (four of which would wait)A + //fprintf(stderr, "server_disk: listen server socket\n"); + int iretlisten = listen(sockfd, 5); + if (iretlisten != 0) {Error("listen");} + + // Accept an incoming connection + // Reserve client address structure first + // This blocks indefinitely, until a conneciton is tried from some client + struct sockaddr_in client_addr; + memset(&client_addr, 0, sizeof(client_addr)); + + socklen_t clientlen = sizeof(client_addr); // This will get changed to actual client len by accept + int acceptsockfd = accept(sockfd, reinterpret_cast(&client_addr), &clientlen); + if (acceptsockfd < 0) {Error("accept");} + const sockaddr_in* sin = reinterpret_cast(&client_addr); + *client_ip = ntohl(sin->sin_addr.s_addr); + *client_port = ntohs(sin->sin_port); + + // We have a connection from some client machine + fprintf(stderr, "server_disk: connection from %s\n", FormatIpPort(*client_ip, *client_port)); + return acceptsockfd; +} + +bool ReceiveRequest(int sockfd, RPC* req) { + return ReadOneRPC(sockfd, req, NULL); +} + +bool SendResponse(int sockfd, RPC* resp) { + SendOneRPC(sockfd, resp, NULL); + return true; +} + +string StringPrintf(const char* format, ...) { + va_list args; + char buffer[256]; + va_start(args, format); + int n = vsnprintf(buffer, 256, format, args); + va_end(args); + if (n < 0) {return string("");} + return string(buffer, n); +} + +// Create the repsonse to a ping request +// Returns false on any error, with response status indicating why +bool DoPing(SharedData* shareddata, const RPC* request, RPC* response) { + // We just send the data back unchanged + CopyRPCData(request, response); + return true; +} + + +// Do a simple checksum of a string, returning a short string +char* Checksum(const char* s, char* chksumbuf) { + uint32 sum = 0; + int len = strlen(s); + for (int i = 0; i < (len >> 2); i += 4) { + sum = rotl32(sum, 3) + *reinterpret_cast(&s[i]); + } + // Tail end if any + if (len & 3) { + char temp[4]; + memset(temp, 0, 4); + memcpy(temp, &s[len & ~3], len & 3); + sum = rotl32(sum, 3) + *reinterpret_cast(temp); + } + // Now make a short character string output + memset(chksumbuf, 0, 16); + sprintf(chksumbuf, "%08x", sum); + // fprintf(stderr, "Checksum(%d) = '%s'\n", len, chksumbuf); + return chksumbuf; +} + +// Allocate a byte array of given size, aligned on a page boundary +// Caller will call free(rawptr) +uint8* AllocPageAligned(int bytesize, uint8** rawptr) { + int newsize = bytesize + kPageSizeMask; + *rawptr = reinterpret_cast(malloc(newsize)); + uintptr_t temp = reinterpret_cast(*rawptr); + uintptr_t temp2 = (temp + kPageSizeMask) & ~kPageSizeMask; + return reinterpret_cast(temp2); +} + + + +bool BufferedRead(string fname, uint8* buffer, int maxsize, int* n) { + *n = 0; + FILE* f = fopen(fname.c_str(), "rb"); + if (f == NULL) { + return false; + } + // Read the value + *n = fread(buffer, 1, maxsize, f); + fclose(f); + // If n is max buffersize, then assume value is too long so fail + if (*n == maxsize) { + return false; + } + return true; +} + +// Note: O_DIRECT must transfer multiples of 4KB into aligned buffer +bool DirectRead(string fname, uint8* buffer, int maxsize, int* n) { + *n = 0; + int fd = open(fname.c_str(), O_RDONLY | O_NOATIME | O_DIRECT | O_SYNC); + if (fd < 0) { + perror("DirectRead open"); + return false; + } + // Read the value + *n = read(fd, buffer, maxsize); + close(fd); + // If n is max buffersize, then assume value is too long so fail + if ((*n < 0) || (*n == maxsize)) { + perror("DirectRead read"); + return false; + } + return true; +} + + +bool BufferedWrite(string fname, const uint8* buffer, int size, int* n) { + *n = 0; + FILE* f = fopen(fname.c_str(), "wb"); + if (f == NULL) { + return false; + } + // Write the value + *n = fwrite(buffer, 1, size, f); + fclose(f); + if (*n != size) { + return false; + } + return true; +} + +// int fd = open(filename, O_WRONLY | O_CREAT | O_DIRECT | O_SYNC, S_IRWXU); +// if (fd < 0) {perror("server_disk write open"); return;} + + + +// Note: O_DIRECT must transfer multiples of 4KB into aligned buffer +bool DirectWrite(string fname, const uint8* buffer, int size, int* n) { + *n = 0; + int fd = open(fname.c_str(), O_WRONLY | O_CREAT | O_DIRECT | O_SYNC, S_IRWXU); + if (fd < 0) { + perror("DirectWrite open"); + return false; + } + // Read the value + *n = write(fd, buffer, size); + close(fd); + // If n is max buffersize, then assume value is too long so fail + if (*n != size) { + perror("DirectWrite write"); + return false; + } + return true; +} + + + +// Read from request argument +// Set result data to +// Return status=fail and empty data if key is not found +bool DoRead(SharedData* shareddata, const RPC* request, RPC* response) { + const uint8* req_data = request->data; + string key = GetStringArg(&req_data); + string value; + { + SpinLock sp(&shareddata->lockandhist); + // Open key on disk, if any. read/close + string fname = shareddata->directory + "/" + key; + // fprintf(stdout, " \"%s\"\n", fname.c_str()); + + uint8* rawptr; + uint8* ptr = AllocPageAligned(kMaxValueSize, &rawptr); + bool ok; + int n; + if (direct) { + ok = DirectRead(fname, ptr, kMaxValueSize, &n); + } else { + ok = BufferedRead(fname, ptr, kMaxValueSize, &n); + } + if (!ok) { + response->header->status = FailStatus; // Let the caller know key wasn't there + free(rawptr); + return true; + } + PutStringRPC(string((char*)ptr, n), response); + free(rawptr); + + WaitMsec(wait_msec); + } + + return true; +} + +// Read but then return just a simple checksum of the value +bool DoChksum(SharedData* shareddata, const RPC* request, RPC* response) { + const uint8* req_data = request->data; + string key = GetStringArg(&req_data); + string value; + { + SpinLock sp(&shareddata->lockandhist); + // Open key on disk, if any. read/close + string fname = shareddata->directory + "/" + key; + + uint8* rawptr; + uint8* ptr = AllocPageAligned(kMaxValueSize, &rawptr); + bool ok; + int n; + if (direct) { + ok = DirectRead(fname, ptr, kMaxValueSize, &n); + } else { + ok = BufferedRead(fname, ptr, kMaxValueSize, &n); + } + if (!ok) { + response->header->status = FailStatus; // Let the caller know key wasn't there + free(rawptr); + return true; + } + + char chksumbuf[16]; // Only 9 used + PutStringRPC(string(Checksum((char*)ptr, chksumbuf), 8), response); + free(rawptr); + + WaitMsec(wait_msec); + } + + return true; +} + +// Write from request arguments +// No result data +bool DoWrite(SharedData* shareddata, const RPC* request, RPC* response) { + const uint8* req_data = request->data; + string key = GetStringArg(&req_data); + string value = GetStringArg(&req_data); + { + SpinLock sp(&shareddata->lockandhist); + string fname = shareddata->directory + "/" + key; + // fprintf(stdout, " \"%s\"\n", fname.c_str()); + + // Check limited length + if (kMaxValueSize <= value.size()) { + response->header->status = FailStatus; // Let the caller know value was too long + return true; + } + + bool ok; + int n; + if (direct) { + // Direct has to be multiple of 4KB and aligned. Round down to 4KB + int valuesize4k = value.size() & ~kPageSizeMask; + uint8* rawptr; + uint8* ptr = AllocPageAligned(valuesize4k, &rawptr); + memcpy(ptr, value.data(), valuesize4k); + ok = DirectWrite(fname, ptr, valuesize4k, &n); + free(rawptr); + } else { + ok = BufferedWrite(fname, (const uint8*)value.data(), value.size(), &n); + } + if (!ok) { + response->header->status = FailStatus; // Let the caller know key wasn't there + return true; + } + + WaitMsec(wait_msec); + } + + return true; +} + + +// Delete from request argument +// No result data +// Return status=fail if key is not found +bool DoDelete(SharedData* shareddata, const RPC* request, RPC* response) { + const uint8* req_data = request->data; + string key = GetStringArg(&req_data); + { + SpinLock sp(&shareddata->lockandhist); + // Open key on disk to get there/notthere + // Delete + string fname = shareddata->directory + "/" + key; + FILE* f = fopen(fname.c_str(), "r"); + if (f == NULL) { + response->header->status = FailStatus; // Let the caller know key open failed + return true; + } + fclose(f); + int n = remove(fname.c_str()); + // Check for error on remove() + if (n != 0) { + response->header->status = FailStatus; // Let the caller know remove failed + return true; + } + + WaitMsec(wait_msec); + } + + return true; +} + + +// Return a string of the 32 spinlock-usec histogram values +bool DoStats(SharedData* shareddata, const RPC* request, RPC* response) { + string result; + { + SpinLock sp(&shareddata->lockandhist); + result.append(StringPrintf("Stats: ")); + for (int i = 0; i < 32; ++i) { + result.append(StringPrintf("%d ", shareddata->lockandhist.hist[i])); + } + PutStringRPC(result, response); + + WaitMsec(wait_msec); + } + + return true; +} + + +//==== +//==== + +// Helper for deleting all files in a directory +// Return 0 on sucess +int fn(const char *fpath, const struct stat *sb, + int typeflag, struct FTW *ftwbuf) { + // fprintf(stdout, " fn(%s)\n", fpath); + // Do not delete the top-level directory itself + if (ftwbuf->level == 0) {return 0;} + // Check if stat() call failed on fpath + if (typeflag == FTW_NS) {return 1;} + int n = remove(fpath); + // Check if remove() failed + if (n != 0) {return 1;} + return 0; +} + + +// Erase all pairs +// No result data +bool DoReset(SharedData* shareddata, const RPC* request, RPC* response) { + { + SpinLock sp(&shareddata->lockandhist); + // delete all + int errors = nftw(shareddata->directory.c_str(), fn, 2, FTW_DEPTH); + if (errors != 0) { + response->header->status = FailStatus; // Let the caller know delete failed + } + + WaitMsec(wait_msec); + } + + return true; +} + + +// Create the repsonse to a quit request +// Returns false on any error, with response status indicating why +bool DoQuit(SharedData* shareddata, const RPC* request, RPC* response) { + return true; +} + +// Create the repsonse showing an erroneous request +// Returns false on any error, with response status indicating why +bool DoError(SharedData* shareddata, const RPC* request, RPC* response) { + // We just send the data back unchanged + CopyRPCData(request, response); + response->header->status = FailStatus; + return false; +} + + +// Open a TCP/IP socket, bind it to given port, then listen, etc. +// Outer loop: listen, accept, read/write until closed +// Inner loop: read() until socket is closed or Quit +// Returns true when Quit message is received +void* SocketLoop(void* arg) { + PerThreadData* perthreaddata = reinterpret_cast(arg); + SharedData* shareddata = perthreaddata->shareddata; + int sockfd = OpenSocket(perthreaddata->portnum); + + // Outer loop: listen, accept, read/write*, close connection + for (;;) { + if (stopping) {break;} + + bool ok = true; + uint32 client_ip; + uint16 client_port; + int acceptsockfd = ConnectToClient(sockfd, &client_ip, &client_port); + + int optval = 1; + setsockopt(acceptsockfd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(int)); + setsockopt(acceptsockfd, SOL_SOCKET, SO_REUSEPORT, &optval, sizeof(int)); + + // Inner loop: read/process/write RPCs + for (;;) { + if (stopping) {break;} + + RPC request; + RPC response; + + ok &= ReceiveRequest(acceptsockfd, &request); + if (!ok) {break;} // Most likely, client dropped the connection + + // Process request + request.header->req_rcv_timestamp = GetUsec(); + request.header->client_ip = client_ip; + request.header->client_port = client_port; + request.header->type = ReqRcvType; + + // Trace the incoming RPC request + // RPCid is pseudo-random 32 bits, but never zero. + // If low 16 bits are zero, use high 16 bits. + uint32 tempid; + tempid = request.header->rpcid & 0xffff; + if(tempid == 0) {tempid = (request.header->rpcid >> 16) & 0xffff;} + kutrace::addevent(KUTRACE_RPCIDREQ, tempid); + + LogRPC(shareddata->logfile, &request); + if (verbose) { + fprintf(stdout, "server_disk: ReceiveRequest: "); + PrintRPC(stdout, &request); + } + if (verbose_data) { + // Send method, key, value to stdout + const uint8* req_data = request.data; + const uint8* req_data_limit = request.data + request.datalen; + fprintf(stdout, "%s ", request.header->method); + if (req_data < req_data_limit) { + string key = GetStringArg(&req_data); + fprintf(stdout, "%s ", key.c_str()); + } + if (req_data < req_data_limit) { + string value = GetStringArg(&req_data); + fprintf(stdout, "%s ", value.c_str()); + } + fprintf(stdout, "\n"); + } + + + // Process response + CopyRPCHeader(&request, &response); + response.data = NULL; + response.datalen = 0; + RPCHeader* hdr = response.header; + hdr->type = RespSendType; + hdr->status = SuccessStatus; + kutrace::mark_a(hdr->method); + + if (strcmp(hdr->method, "ping") == 0) { + ok &= DoPing(shareddata, &request, &response); + } else if (strcmp(hdr->method, "read") == 0) { + ok &= DoRead(shareddata, &request, &response); + } else if (strcmp(hdr->method, "chksum") == 0) { + ok &= DoChksum(shareddata, &request, &response); + } else if (strcmp(hdr->method, "write") == 0) { + ok &= DoWrite(shareddata, &request, &response); + } else if (strcmp(hdr->method, "delete") == 0) { + ok &= DoDelete(shareddata, &request, &response); + } else if (strcmp(hdr->method, "stats") == 0) { + ok &= DoStats(shareddata, &request, &response); + } else if (strcmp(hdr->method, "reset") == 0) { + ok &= DoReset(shareddata, &request, &response); + } else if (strcmp(hdr->method, "quit") == 0) { + ok &= DoQuit(shareddata, &request, &response); + stopping = true; + } else { + ok &= DoError(shareddata, &request, &response); + } + + // Stop tracing the incoming RPC request + kutrace::addevent(KUTRACE_RPCIDREQ, 0); + + + // Send response + hdr->lglen2 = TenLg(response.datalen); + hdr->resp_send_timestamp = GetUsec(); + hdr->type = RespSendType; + LogRPC(shareddata->logfile, &response); + if (verbose) { + fprintf(stdout, "server_disk: SendResponse: "); + PrintRPC(stdout, &response); + } + + // Trace the outgoing RPC response + // RPCid is pseudo-random 32 bits, but never zero. + // If low 16 bits are zero, send high 16 bits. + tempid = response.header->rpcid & 0xffff; + if(tempid == 0) {tempid = (response.header->rpcid >> 16) & 0xffff;} + kutrace::addevent(KUTRACE_RPCIDRESP, tempid); + + ok &= SendResponse(acceptsockfd, &response); + + FreeRPC(&request); + FreeRPC(&response); + + // Stop tracing the outgoing RPC response + kutrace::addevent(KUTRACE_RPCIDRESP, 0); + + if (!ok) {break;} // Most likely, client dropped the connection + } + + // Connection was closed -- go back around and wait for another connection + close(acceptsockfd); + } + + close(sockfd); + return NULL; +} + +void Usage() { + fprintf(stderr, "Usage: server_disk directory_name [portnumber] [num_ports] [-verbose] [-direct] [-data] [-wait msec]\n"); + exit(EXIT_FAILURE); +} + + +// Just call our little server loop +int main (int argc, const char** argv) { + int base_port = -1; + int num_ports = -1; + if (argc < 2) {Usage();} + const char* directory_name = argv[1]; + + for (int i = 2; i < argc; ++i) { + if (strcmp(argv[i], "-verbose") == 0) { + verbose = true; + } else if (strcmp(argv[i], "-direct") == 0) { + direct = true; + } else if (strcmp(argv[i], "-data") == 0) { + verbose_data = true; + } else if ((strcmp(argv[i], "-wait") == 0) && (i < (argc - 1))) { + wait_msec = atoi(argv[i + 1]); + ++i; + } else if (argv[i][0] != '-') { + // We have a number + if (base_port < 0) {base_port = atoi(argv[i]);} + else if (num_ports < 0) {num_ports = atoi(argv[i]);} + } else { + Usage(); + } + } + // Apply defaults + if (base_port < 0) {base_port = 12345;} + if (num_ports < 0) {num_ports = 8;} + + CalibrateCycleCounter(); + + // Set up the shared data + SharedData shareddata; + memset(&shareddata, 0, sizeof(SharedData)); + shareddata.directory = string(directory_name); + shareddata.logfilename = MakeLogFileName(argv[0]); + shareddata.logfile = OpenLogFileOrDie(shareddata.logfilename); + shareddata.key_value.clear(); + + // Create our "database" directory if not already there + struct stat st = {0}; + if (stat(directory_name, &st) == -1) { + fprintf(stderr, "Creating directory %s\n", directory_name); + mkdir(directory_name, 0700); + } + + // Set up N ports, N in [1..8] + // This leaks memory for the small number of PerThreadData structures, but + // all we do next is terminate + fprintf(stderr, "\n"); + for (int n = 0; n < num_ports; ++n) { + // Allocate a per-thread data structure and fill it in + PerThreadData* perthreaddata = new PerThreadData; + perthreaddata->portnum = base_port + n; + perthreaddata->shareddata = &shareddata; + + // Launch a pthread to listen on that port + // Create independent threads each of which will execute function SocketLoop + pthread_t thread; + fprintf(stderr, "server_disk: launching a thread to listen on port %d\n", + perthreaddata->portnum); + int iret = pthread_create( &thread, NULL, SocketLoop, (void*) perthreaddata); + if (iret != 0) {Error("pthread_create()", iret);} + } + + int total_seconds = 0; + while (!stopping) { + sleep(2); // Poll every 2 seconds + total_seconds += 2; + if (total_seconds >= kMaxRunTimeSeconds) { + fprintf(stderr, + "server_disk: timed out after %d minutes (safety move) ...\n", + kMaxRunTimeSeconds / 60); + stopping = true; + } + if (stopping) {break;} + } + + // Do not clear. User must explicitly send reset command + //// Clear the database files on exit. Ignore errors + //// nftw(shareddata.directory.c_str(), fn, 2, FTW_DEPTH); + + fclose(shareddata.logfile); + fprintf(stderr, " %s written\n", shareddata.logfilename); + + exit(EXIT_SUCCESS); +} + diff --git a/book-user-code/server_mystery21.cc b/book-user-code/server_mystery21.cc new file mode 100644 index 000000000000..8f9b4c4f7f2a --- /dev/null +++ b/book-user-code/server_mystery21.cc @@ -0,0 +1,876 @@ +// server_mystery21.cc cloned from server4.cc 2018.07.08 +// Serve dummy database from disk not RAM +// Uses /tmp/keyvaluestore which will be created if not there +// Copyright 2021 Richard L. Sites +// +// dick sites 2019.09.27 Add fake one-entry "cache" for timing. No way to invalidate it. +// dick sites 2020.02.09 Change to WeirdChecksum to deliberately vary execution time +// +// compile with +// g++ -O2 -pthread server_mystery21.cc dclab_log.cc dclab_rpc.cc kutrace_lib.cc spinlock_fixed.cc -o server_mystery21 + +#include +#include // For nftw file tree walk +#include +#include +#include +#include +#include +#include // for nanosleep +#include // read(), stat, mkdir +#include +#include /* superset of previous */ +#include +#include +#include // For stat/mkdir +#include // For stat/mkdir, others + +// For -direct disk read/write +#include +#include +#include + +#include +#include + +#include "basetypes.h" +#include "dclab_log.h" +#include "dclab_rpc.h" +#include "kutrace_lib.h" +#include "polynomial.h" +#include "spinlock.h" +#include "timecounters.h" + +using std::map; +using std::string; + + +// Should generate a single rotate instruction when available +inline uint32_t rotl32 (uint32_t x, uint32_t n) +{ + return (x << n) | (x >> (32-n)); +} + +typedef map MapKeyValue; + +typedef struct { + LockAndHist lockandhist; + string directory; // String for easy concatenate + const char* logfilename; + FILE* logfile; + MapKeyValue key_value; +} SharedData; + +typedef struct { + string cached_key; + string cached_value; + uint16 portnum; + SharedData* shareddata; +} PerThreadData; + +static const int kMaxRunTimeSeconds = 4 * 60; + +static const char* kDirectoryName = "/tmp/keyvaluestore"; + +static const int kPageSize = 4096; // Must be a power of two +static const int kPageSizeMask = kPageSize - 1; + +// Must be a multiple of 4KB +static const int kMaxValueSize = 1025 * 1024; // 1MB + 1KB extra + + +// Global flags +static bool direct = false; // If true, read/write O_DIRECT O_SYNC +static bool verbose = false; +static bool verbose_data = false; +static bool stopping = false; // Any thread can set this true +static int wait_msec = 0; // Extra time to hold lock, for extra interference + +// Wait n msec +void WaitMsec(int msec) { + if (msec == 0) {return;} + struct timespec req; + req.tv_sec = msec / 1000; + req.tv_nsec = (msec % 1000) * 1000000; + nanosleep(&req, NULL); +} + + +int OpenSocket(int16 portnum) { + // Open a TCP/IPv4 socket. + // Returns file descriptor if OK, -1 and sets errno if bad + //fprintf(stderr, "server_mystery21: Open server socket\n"); + int sockfd = socket(AF_INET, SOCK_STREAM, 0); + if (sockfd < 0) {Error("Socket open");} + + // Bind this socket to a particular TCP/IP port. + // Construct server address structure first + struct sockaddr_in server_addr; + memset(&server_addr, 0, sizeof(server_addr)); + server_addr.sin_family = AF_INET; // TCP/IPv4 + server_addr.sin_addr.s_addr = INADDR_ANY; // Find out my IP address + server_addr.sin_port = htons(portnum); // host-to-network of short int portnum + + //fprintf(stderr, "server_mystery21: Bind server socket, port %d\n", portnum); + int iretbind = bind(sockfd, + reinterpret_cast(&server_addr), + sizeof(server_addr)); + if (iretbind != 0) {Error("Bind socket");} + //fprintf(stderr, "server_mystery21: Bound server socket %08x:%04x\n", server_addr.sin_addr.s_addr, server_addr.sin_port); + + return sockfd; +} + +// Accept a client connection and return the new socket fd +int ConnectToClient(int sockfd, uint32* client_ip, uint16* client_port) { + // Listen on the bound port for a connection attempt. + // Allow the default maximum 5 simultaneous attempts (four of which would wait)A + //fprintf(stderr, "server_mystery21: listen server socket\n"); + int iretlisten = listen(sockfd, 5); + if (iretlisten != 0) {Error("listen");} + + // Accept an incoming connection + // Reserve client address structure first + // This blocks indefinitely, until a conneciton is tried from some client + struct sockaddr_in client_addr; + memset(&client_addr, 0, sizeof(client_addr)); + + socklen_t clientlen = sizeof(client_addr); // This will get changed to actual client len by accept + int acceptsockfd = accept(sockfd, reinterpret_cast(&client_addr), &clientlen); + if (acceptsockfd < 0) {Error("accept");} + const sockaddr_in* sin = reinterpret_cast(&client_addr); + *client_ip = ntohl(sin->sin_addr.s_addr); + *client_port = ntohs(sin->sin_port); + + // We have a connection from some client machine + fprintf(stderr, "server_mystery21: connection from %s\n", FormatIpPort(*client_ip, *client_port)); + return acceptsockfd; +} + +bool ReceiveRequest(int sockfd, RPC* req) { + return ReadOneRPC(sockfd, req, NULL); +} + +bool SendResponse(int sockfd, RPC* resp) { + SendOneRPC(sockfd, resp, NULL); + return true; +} + +string StringPrintf(const char* format, ...) { + va_list args; + char buffer[256]; + va_start(args, format); + int n = vsnprintf(buffer, 256, format, args); + va_end(args); + if (n < 0) {return string("");} + return string(buffer, n); +} + +// Create the repsonse to a ping request +// Returns false on any error, with response status indicating why +bool DoPing(SharedData* shareddata, const RPC* request, RPC* response) { + // We just send the data back unchanged + CopyRPCData(request, response); + return true; +} + + +// Do a simple checksum of a string, returning a short string +char* Checksum(const char* s, char* chksumbuf) { + uint32 sum = 0; + int len = strlen(s); + for (int i = 0; i < (len >> 2); i += 4) { + sum = rotl32(sum, 3) + *reinterpret_cast(&s[i]); + } + // Tail end if any + if (len & 3) { + char temp[4]; + memset(temp, 0, 4); + memcpy(temp, &s[len & ~3], len & 3); + sum = rotl32(sum, 3) + *reinterpret_cast(temp); + } + // Now make a short character string output + memset(chksumbuf, 0, 16); + sprintf(chksumbuf, "%08x", sum); + // fprintf(stderr, "Checksum(%d) = '%s'\n", len, chksumbuf); + return chksumbuf; +} + +static const uint8* cryptkey = (const uint8*)"prettygoodkeyphrase"; + +// Do a simple RC4 decryption of a string, returning a short string +char* DecryptingChecksum(const char* s, char* chksumbuf) { + int keylength = strlen((const char*)cryptkey); + int i, j; + // Set up key schedule + uint8 perm[256]; + for (i = 0; i < 256; ++i) {perm[i] = i;} + j = 0; + for (i = 0; i < 256; ++i) { + j = (j + perm[i] + cryptkey[i % keylength]) & 255; + uint8 temp = perm[i]; perm[i] = perm[j]; perm[j] = temp; + } + + // Generate output and xor with s + i = 0; + j = 0; + uint32 sum = 0; + int len = strlen(s); + for (int n = 0; n < len; ++n) { + i = (i + 1) & 255; + j = (j + perm[i]) & 255; + uint8 temp = perm[i]; perm[i] = perm[j]; perm[j] = temp; + uint8 k = perm[(perm[i] + perm[j]) & 255]; + sum += (s[i] ^ k); + } + + // Now make a short character string output + memset(chksumbuf, 0, 16); + sprintf(chksumbuf, "%08x", sum); + // fprintf(stderr, "Checksum(%d) = '%s'\n", len, chksumbuf); + return chksumbuf; +} + +// Pseudo-random value +//uint32 polyx = POLYINIT32; +uint32 polyx = 1234567890; + +// True always +bool SomeComplexBusinessLogic(uint32 polyx) { + return true; +} + +// True 1 time out of 64 +bool OtherBusinessLogic(uint32 polyx) { + return (polyx & 63) == 0; +} + +// True 1 time out of 5 +bool WrongBusinessLogic(uint32 polyx) { + return (polyx % 5) == 0; +} + +// A checksum routine that deliberately varies how long it takes from call to call +char* WeirdChecksum(const char* s, char* chksumbuf) { + char* retval = NULL; + if (SomeComplexBusinessLogic(polyx)) { + // The case we are testing + if (OtherBusinessLogic(polyx)) { + // 1 of 64, slow processing + kutrace::mark_b("decryp"); + for (int n = 0; n < 10; ++n) {retval = DecryptingChecksum(s, chksumbuf);} + } else { + // 63 of 64, normal processing + kutrace::mark_b("chk"); + for (int n = 0; n < 10; ++n) {retval = Checksum(s, chksumbuf);} + } + if (WrongBusinessLogic(polyx)) { + // 1 of 5, medium processing + kutrace::mark_b("chk"); + for (int n = 0; n < 10; ++n) {retval = Checksum(s, chksumbuf);} + kutrace::mark_b("chk"); + for (int n = 0; n < 10; ++n) {retval = Checksum(s, chksumbuf);} + } + } else { + // .. other cases that never happen + retval = Checksum(s, chksumbuf); + } + + // Update our pseudo-random business logic + polyx = POLYSHIFT32(polyx); + polyx = POLYSHIFT32(polyx); + return retval; +} + +// Allocate a byte array of given size, aligned on a page boundary +// Caller will call free(rawptr) +uint8* AllocPageAligned(int bytesize, uint8** rawptr) { + int newsize = bytesize + kPageSizeMask; + *rawptr = reinterpret_cast(malloc(newsize)); + uintptr_t temp = reinterpret_cast(*rawptr); + uintptr_t temp2 = (temp + kPageSizeMask) & ~kPageSizeMask; + return reinterpret_cast(temp2); +} + + + +bool BufferedRead(string fname, uint8* buffer, int maxsize, int* n) { + *n = 0; + FILE* f = fopen(fname.c_str(), "rb"); + if (f == NULL) { + return false; + } + // Read the value + *n = fread(buffer, 1, maxsize, f); + fclose(f); + // If n is max buffersize, then assume value is too long so fail + if (*n == maxsize) { + return false; + } + return true; +} + +// Note: O_DIRECT must transfer multiples of 4KB into aligned buffer +bool DirectRead(string fname, uint8* buffer, int maxsize, int* n) { + *n = 0; + int fd = open(fname.c_str(), O_RDONLY | O_NOATIME | O_DIRECT | O_SYNC); + if (fd < 0) { + perror("DirectRead open"); + return false; + } + // Read the value + *n = read(fd, buffer, maxsize); + close(fd); + // If n is max buffersize, then assume value is too long so fail + if ((*n < 0) || (*n == maxsize)) { + perror("DirectRead read"); + return false; + } + return true; +} + + +bool BufferedWrite(string fname, const uint8* buffer, int size, int* n) { + *n = 0; + FILE* f = fopen(fname.c_str(), "wb"); + if (f == NULL) { + return false; + } + // Write the value + *n = fwrite(buffer, 1, size, f); + fclose(f); + if (*n != size) { + return false; + } + return true; +} + +// int fd = open(filename, O_WRONLY | O_CREAT | O_DIRECT | O_SYNC, S_IRWXU); +// if (fd < 0) {perror("server_mystery21 write open"); return;} + + + +// Note: O_DIRECT must transfer multiples of 4KB into aligned buffer +bool DirectWrite(string fname, const uint8* buffer, int size, int* n) { + *n = 0; + int fd = open(fname.c_str(), O_WRONLY | O_CREAT | O_DIRECT | O_SYNC, S_IRWXU); + if (fd < 0) { + perror("DirectWrite open"); + return false; + } + // Read the value + *n = write(fd, buffer, size); + close(fd); + // If n is max buffersize, then assume value is too long so fail + if (*n != size) { + perror("DirectWrite write"); + return false; + } + return true; +} + + + +// Read from request argument +// Set result data to +// Return status=fail and empty data if key is not found +bool DoRead(PerThreadData* perthreaddata, + SharedData* shareddata, const RPC* request, RPC* response) { + const uint8* req_data = request->data; + string key = GetStringArg(&req_data); + string value; + + // Dummy one-entry "cache". There is no mechanism to invaladate old data after a write + if (perthreaddata->cached_key == key) { + // Hit + ////fprintf(stdout, "%s hit\n", key.c_str()); + PutStringRPC(perthreaddata->cached_value, response); + return true; + } + + { + SpinLock sp(&shareddata->lockandhist); + // Open key on disk, if any. read/close + string fname = shareddata->directory + "/" + key; + // fprintf(stdout, " \"%s\"\n", fname.c_str()); + + uint8* rawptr; + uint8* ptr = AllocPageAligned(kMaxValueSize, &rawptr); + bool ok; + int n; + kutrace::mark_b("disk"); + if (direct) { + ok = DirectRead(fname, ptr, kMaxValueSize, &n); + } else { + ok = BufferedRead(fname, ptr, kMaxValueSize, &n); + } + kutrace::mark_b("/disk"); + if (!ok) { + response->header->status = FailStatus; // Let the caller know key wasn't there + free(rawptr); + return true; + } + // Cache the pair + perthreaddata->cached_key = key; + perthreaddata->cached_value = string((char*)ptr, n); + + PutStringRPC(perthreaddata->cached_value, response); + free(rawptr); + + WaitMsec(wait_msec); + } + + return true; +} + +// Read but then return just a simple checksum of the value +bool DoChksum(PerThreadData* perthreaddata, SharedData* shareddata, const RPC* request, RPC* response) { + const uint8* req_data = request->data; + string key = GetStringArg(&req_data); + string value; + + // Dummy one-entry "cache". There is no mechanism to invaladate old data after a write + if (perthreaddata->cached_key == key) { + // Hit + ////fprintf(stdout, "%s hit\n", key.c_str()); + char chksumbuf[16]; // Only 9 used + PutStringRPC(string(WeirdChecksum(perthreaddata->cached_value.c_str(), chksumbuf), 8), response); + WaitMsec(wait_msec); + return true; + } + + { + SpinLock sp(&shareddata->lockandhist); + // Open key on disk, if any. read/close + string fname = shareddata->directory + "/" + key; + + uint8* rawptr; + uint8* ptr = AllocPageAligned(kMaxValueSize, &rawptr); + bool ok; + int n; + kutrace::mark_b("disk"); + if (direct) { + ok = DirectRead(fname, ptr, kMaxValueSize, &n); + } else { + ok = BufferedRead(fname, ptr, kMaxValueSize, &n); + } + kutrace::mark_b("/disk"); + if (!ok) { + response->header->status = FailStatus; // Let the caller know key wasn't there + free(rawptr); + return true; + } + // Cache the pair + perthreaddata->cached_key = key; + perthreaddata->cached_value = string((char*)ptr, n); + + char chksumbuf[16]; // Only 9 used + PutStringRPC(string(Checksum((char*)ptr, chksumbuf), 8), response); + free(rawptr); + + WaitMsec(wait_msec); + } + + return true; +} + +// Write from request arguments +// No result data +bool DoWrite(SharedData* shareddata, const RPC* request, RPC* response) { + const uint8* req_data = request->data; + string key = GetStringArg(&req_data); + string value = GetStringArg(&req_data); + { + SpinLock sp(&shareddata->lockandhist); + string fname = shareddata->directory + "/" + key; + // fprintf(stdout, " \"%s\"\n", fname.c_str()); + + // Check limited length + if (kMaxValueSize <= value.size()) { + response->header->status = FailStatus; // Let the caller know value was too long + return true; + } + + bool ok; + int n; + if (direct) { + // Direct has to be multiple of 4KB and aligned. Round down to 4KB + int valuesize4k = value.size() & ~kPageSizeMask; + uint8* rawptr; + uint8* ptr = AllocPageAligned(valuesize4k, &rawptr); + memcpy(ptr, value.data(), valuesize4k); + ok = DirectWrite(fname, ptr, valuesize4k, &n); + free(rawptr); + } else { + ok = BufferedWrite(fname, (const uint8*)value.data(), value.size(), &n); + } + if (!ok) { + response->header->status = FailStatus; // Let the caller know key wasn't there + return true; + } + + WaitMsec(wait_msec); + } + + return true; +} + + +// Delete from request argument +// No result data +// Return status=fail if key is not found +bool DoDelete(SharedData* shareddata, const RPC* request, RPC* response) { + const uint8* req_data = request->data; + string key = GetStringArg(&req_data); + { + SpinLock sp(&shareddata->lockandhist); + // Open key on disk to get there/notthere + // Delete + string fname = shareddata->directory + "/" + key; + FILE* f = fopen(fname.c_str(), "r"); + if (f == NULL) { + response->header->status = FailStatus; // Let the caller know key open failed + return true; + } + fclose(f); + int n = remove(fname.c_str()); + // Check for error on remove() + if (n != 0) { + response->header->status = FailStatus; // Let the caller know remove failed + return true; + } + + WaitMsec(wait_msec); + } + + return true; +} + + +// Return a string of the 32 spinlock-usec histogram values +bool DoStats(SharedData* shareddata, const RPC* request, RPC* response) { + string result; + { + SpinLock sp(&shareddata->lockandhist); + result.append(StringPrintf("Lock acquire: ")); + for (int i = 0; i < 32; ++i) { + result.append(StringPrintf("%d ", shareddata->lockandhist.hist[i])); + if ((i % 10) == 9) {result.append(" ");} + } + PutStringRPC(result, response); + + WaitMsec(wait_msec); + } + + return true; +} + + +//==== +//==== + +// Helper for deleting all files in a directory +// Return 0 on sucess +int fn(const char *fpath, const struct stat *sb, + int typeflag, struct FTW *ftwbuf) { + // fprintf(stdout, " fn(%s)\n", fpath); + // Do not delete the top-level directory itself + if (ftwbuf->level == 0) {return 0;} + // Check if stat() call failed on fpath + if (typeflag == FTW_NS) {return 1;} + int n = remove(fpath); + // Check if remove() failed + if (n != 0) {return 1;} + return 0; +} + + +// Erase all pairs +// No result data +bool DoReset(SharedData* shareddata, const RPC* request, RPC* response) { + { + SpinLock sp(&shareddata->lockandhist); + // delete all + int errors = nftw(shareddata->directory.c_str(), fn, 2, FTW_DEPTH); + if (errors != 0) { + response->header->status = FailStatus; // Let the caller know delete failed + } + + WaitMsec(wait_msec); + } + + return true; +} + + +// Create the repsonse to a quit request +// Returns false on any error, with response status indicating why +bool DoQuit(SharedData* shareddata, const RPC* request, RPC* response) { + return true; +} + +// Create the repsonse showing an erroneous request +// Returns false on any error, with response status indicating why +bool DoError(SharedData* shareddata, const RPC* request, RPC* response) { + // We just send the data back unchanged + CopyRPCData(request, response); + response->header->status = FailStatus; + return false; +} + +// Add a name of type n, value number, to the trace +void AddName(uint64 n, uint64 number, u64 bytelen, const char* name) { + u64 temp[8]; // Buffer for name entry + if (bytelen > 55) {bytelen = 55;} + u64 wordlen = 1 + ((bytelen + 7) / 8); + // Build the initial word + u64 n_with_length = n + (wordlen * 16); + // T N ARG + temp[0] = (CLU(0) << 44) | (n_with_length << 32) | (number); + memset((char*)&temp[1], 0, 7 * sizeof(u64)); + memcpy((char*)&temp[1], name, bytelen); + kutrace::DoControl(KUTRACE_CMD_INSERTN, (u64)&temp[0]); +} + +// Open a TCP/IP socket, bind it to given port, then listen, etc. +// Outer loop: listen, accept, read/write until closed +// Inner loop: read() until socket is closed or Quit +// Returns true when Quit message is received +void* SocketLoop(void* arg) { + PerThreadData* perthreaddata = reinterpret_cast(arg); + SharedData* shareddata = perthreaddata->shareddata; + int sockfd = OpenSocket(perthreaddata->portnum); + + // Outer loop: listen, accept, read/write*, close connection + for (;;) { + if (stopping) {break;} + + bool ok = true; + uint32 client_ip; + uint16 client_port; + int acceptsockfd = ConnectToClient(sockfd, &client_ip, &client_port); + + int optval = 1; + setsockopt(acceptsockfd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(int)); + setsockopt(acceptsockfd, SOL_SOCKET, SO_REUSEPORT, &optval, sizeof(int)); + + // Inner loop: read/process/write RPCs + for (;;) { + if (stopping) {break;} + + RPC request; + RPC response; + + ok &= ReceiveRequest(acceptsockfd, &request); + if (!ok) {break;} // Most likely, client dropped the connection + + // Process request + request.header->req_rcv_timestamp = GetUsec(); + request.header->client_ip = client_ip; + request.header->client_port = client_port; + request.header->type = ReqRcvType; + + // Trace the incoming RPC request + // RPCid is pseudo-random 32 bits, but never zero. + // If low 16 bits are zero, use high 16 bits. + uint32 tempid; + tempid = request.header->rpcid & 0xffff; + if(tempid == 0) {tempid = (request.header->rpcid >> 16) & 0xffff;} + // Add method name to the trace + AddName(KUTRACE_METHODNAME, tempid, 8, request.header->method); + // Add RPC request to the trace + uint64 entry_count = kutrace::addevent(KUTRACE_RPCIDREQ, tempid); + + LogRPC(shareddata->logfile, &request); + if (verbose) { + fprintf(stdout, "server_mystery21: ReceiveRequest: "); + PrintRPC(stdout, &request); + } + if (verbose_data) { + // Send method, key, value to stdout + const uint8* req_data = request.data; + const uint8* req_data_limit = request.data + request.datalen; + fprintf(stdout, "%s ", request.header->method); + if (req_data < req_data_limit) { + string key = GetStringArg(&req_data); + fprintf(stdout, "%s ", key.c_str()); + } + if (req_data < req_data_limit) { + string value = GetStringArg(&req_data); + fprintf(stdout, "%s ", value.c_str()); + } + fprintf(stdout, "\n"); + } + + + // Process response + CopyRPCHeader(&request, &response); + response.data = NULL; + response.datalen = 0; + RPCHeader* hdr = response.header; + hdr->type = RespSendType; + hdr->status = SuccessStatus; + + kutrace::mark_a(hdr->method); + + if (strcmp(hdr->method, "ping") == 0) { + ok &= DoPing(shareddata, &request, &response); + } else if (strcmp(hdr->method, "read") == 0) { + ok &= DoRead(perthreaddata, shareddata, &request, &response); + } else if (strcmp(hdr->method, "chksum") == 0) { + ok &= DoChksum(perthreaddata, shareddata, &request, &response); + } else if (strcmp(hdr->method, "write") == 0) { + ok &= DoWrite(shareddata, &request, &response); + } else if (strcmp(hdr->method, "delete") == 0) { + ok &= DoDelete(shareddata, &request, &response); + } else if (strcmp(hdr->method, "stats") == 0) { + ok &= DoStats(shareddata, &request, &response); + } else if (strcmp(hdr->method, "reset") == 0) { + ok &= DoReset(shareddata, &request, &response); + } else if (strcmp(hdr->method, "quit") == 0) { + ok &= DoQuit(shareddata, &request, &response); + stopping = true; + } else { + ok &= DoError(shareddata, &request, &response); + } + + // Stop tracing the incoming RPC request + kutrace::addevent(KUTRACE_RPCIDREQ, 0); + + + // Send response + hdr->lglen2 = TenLg(response.datalen); + hdr->resp_send_timestamp = GetUsec(); + hdr->type = RespSendType; + LogRPC(shareddata->logfile, &response); + if (verbose) { + fprintf(stdout, "server_mystery21: SendResponse: "); + PrintRPC(stdout, &response); + } + + // Trace the outgoing RPC response + // RPCid is pseudo-random 32 bits, but never zero. + // If low 16 bits are zero, send high 16 bits. + tempid = response.header->rpcid & 0xffff; + if(tempid == 0) {tempid = (response.header->rpcid >> 16) & 0xffff;} + kutrace::addevent(KUTRACE_RPCIDRESP, tempid); + + ok &= SendResponse(acceptsockfd, &response); + + FreeRPC(&request); + FreeRPC(&response); + + // Stop tracing the outgoing RPC response + kutrace::addevent(KUTRACE_RPCIDRESP, 0); + + if (!ok) {break;} // Most likely, client dropped the connection + } + + // Connection was closed -- go back around and wait for another connection + close(acceptsockfd); + } + + close(sockfd); + return NULL; +} + +void Usage() { + fprintf(stderr, + "Usage: server_mystery21 directory " + "[portnumber] [num_ports] [-verbose] [-direct] [-data] [-wait msec]\n"); + exit(EXIT_FAILURE); +} + + +// Just call our little server loop +int main (int argc, const char** argv) { + if (argc < 2) {Usage();} + int base_port = -1; + int num_ports = -1; + const char* directory = argv[1]; +printf("directory = %s\n", directory); + + for (int i = 2; i < argc; ++i) { + if (strcmp(argv[i], "-verbose") == 0) { + verbose = true; + } else if (strcmp(argv[i], "-direct") == 0) { + direct = true; + } else if (strcmp(argv[i], "-data") == 0) { + verbose_data = true; + } else if ((strcmp(argv[i], "-wait") == 0) && (i < (argc - 1))) { + wait_msec = atoi(argv[i + 1]); + ++i; + } else if (argv[i][0] != '-') { + // We have a number + if (base_port < 0) {base_port = atoi(argv[i]);} + else if (num_ports < 0) {num_ports = atoi(argv[i]);} + } else { + Usage(); + } + } + // Apply defaults + if (base_port < 0) {base_port = 12345;} + if (num_ports < 0) {num_ports = 4;} + + CalibrateCycleCounter(); + + // Set up the shared data + SharedData shareddata; + ////memset(&shareddata, 0, sizeof(SharedData)); + shareddata.directory = string(directory); + shareddata.logfilename = MakeLogFileName(argv[0]); + shareddata.logfile = OpenLogFileOrDie(shareddata.logfilename); + shareddata.key_value.clear(); + + // Create our "database" directory if not already there + struct stat st = {0}; + if (stat(shareddata.directory.c_str(), &st) == -1) { + mkdir(shareddata.directory.c_str(), 0700); + } +printf("directory is %s\n", shareddata.directory.c_str()); + + + // Set up N ports, N in [1..4] + // This leaks memory for the small number of PerThreadData structures, but + // all we do next is terminate + fprintf(stderr, "\n"); + for (int n = 0; n < num_ports; ++n) { + // Allocate a per-thread data structure and fill it in + PerThreadData* perthreaddata = new PerThreadData; + perthreaddata->cached_key.clear(); // dummy cache + perthreaddata->cached_value.clear(); // dummy cache + perthreaddata->portnum = base_port + n; + perthreaddata->shareddata = &shareddata; + + // Launch a pthread to listen on that port + // Create independent threads each of which will execute function SocketLoop + pthread_t thread; + fprintf(stderr, "server_mystery21: launching a thread to listen on port %d\n", + perthreaddata->portnum); + int iret = pthread_create( &thread, NULL, SocketLoop, (void*) perthreaddata); + if (iret != 0) {Error("pthread_create()", iret);} + } + + int total_seconds = 0; + while (!stopping) { + sleep(2); // Poll every 2 seconds + total_seconds += 2; + if (total_seconds >= kMaxRunTimeSeconds) { + fprintf(stderr, + "server_mystery21: timed out after %d minutes (safety move) ...\n", + kMaxRunTimeSeconds / 60); + stopping = true; + } + if (stopping) {break;} + } + + // Do not clear. User must explicitly send reset command + // Clear the database files on exit. Ignore errors + //// nftw(shareddata.directory.c_str(), fn, 2, FTW_DEPTH); + + fclose(shareddata.logfile); + fprintf(stderr, " %s written\n", shareddata.logfilename); + + exit(EXIT_SUCCESS); +} + diff --git a/book-user-code/show_cpu.html b/book-user-code/show_cpu.html new file mode 100644 index 000000000000..2460ec26f2c8 --- /dev/null +++ b/book-user-code/show_cpu.html @@ -0,0 +1,8332 @@ + + + + + + + + + + + + + + + + + + + + + +       +Annot: + + + +   Option: + + + + + + + + + + + + +   Search: + + + + +usec: + + +.. + + + + + +
+ + + + + + +
+ + + + + + + + + + + + + + Shift-click 1-5 to save, click to restore. Axes: scroll wheel to zoom, + drag to pan. Items: shift-click-unclick to annotate. + Shift-click-unshift to keep, shift-drag to measure. + Red dot resets. + [more] + + +
+   +------------------------------------------------------------------------+
+   |            (0) UI controls (HTML)                                      |
+   +------------------------------------------------------------------------+
+   |            (1) Title                                                   |
+   +----+--------------------------------------------------------------+----+
+   |(2) |                                                              |(4) |
+   |Y-  |       (3) Main SVG drawing area                              |IPC |
+   |axis|                                                              |    |
+   |    |                                                              |    |
+   +----+--------------------------------------------------------------+----+
+   |            (5) X-axis                                                  |
+   +----+--------------------------------------------------------------+----+
+   |            (6) UI hint text (HTML)                                     |
+   +----+--------------------------------------------------------------+----+
+
+   List of UI active areas
+   In general, click toggles buttons while shift-click cycles through more choices
+   In general, shift-click-unclick to annotate/highlight; shift-click-unshift to 
+     keep multiple results onscreen at once
+
+   (0) Text and buttons
+     User annotate PID names	 off/on
+     All  annotate all items	 off/on
+
+     Mark mark_x events	 	 off/both/text/numbers
+     Arc  wakeup arcs		 off/on
+     Wait Reasons for not exec.	 off/on  Always on now
+     Slow CPU clock freq	 off/gradient
+
+     IPC  instructions per cycle off/both/user/kernel
+     Samp PC samples		 off/on
+     ovhd Approx.overhead times	 off/on  Always on now
+     Lock lock-held lines	 off/on
+     CB   color blind colors	 off/on
+
+     Search:    regex string match 
+     !	        invert search	 off/on, like grep -v
+     usec:      match only event duration in [lo..hi]
+  
+   (1) Title	text size	1.0/1.25/1.50/0.75 times default
+  
+   (2) Y-axis group triangles	collapse/expand/gray=one-fourth/gray=one-twentieth
+       Y-axis labels		mouse and wheel to pan/zoom vertically
+       Y-axis labels		shift-click:normal/highlight
+  
+   (3) anywhere 		mouse and wheel to pan/zoom horizontally	
+     Red dot 			resets display
+     Yellow overlay indicates slow CPU clock rate (power saving)
+    
+   (5) Basetime date		normal/relative to row start
+  
+   (6) Buttons
+     1 2 3 4 5			shift-click:save, click:restore current view
+     double-arrow		toggles between last two views
+     [more]			this text off/on
+
+International Morse Code (for wait events)
+  A    .-    B    -...  Cpu  -.-.  Disk -.. 
+  E    .     F    ..-.  G    --.   H    ....
+  I    ..    J    .---  tasK -.-   Lock .-..
+  Mem  --    Net  -.    O    ---   Pipe .--.
+  Q    --.-  R    .-.   Sche ...   Time -   
+  U    .. -  V    ...-  W    .--   X    -..-
+  Y    -.--  Z    --..
+
+
+ +
+ + + + + diff --git a/book-user-code/show_disk.html b/book-user-code/show_disk.html new file mode 100644 index 000000000000..71fb848f1546 --- /dev/null +++ b/book-user-code/show_disk.html @@ -0,0 +1,780 @@ + + + + + + + + + + + + + + + + + + + + +File: + + + +
+ +
+
+ + + + + + + diff --git a/book-user-code/show_rpc.html b/book-user-code/show_rpc.html new file mode 100644 index 000000000000..b30284dc6ba2 --- /dev/null +++ b/book-user-code/show_rpc.html @@ -0,0 +1,2801 @@ + + + + + + + + + + + + + + + + + + + + + +File: + + +color: + + + + + + + +search: + + + +usec: + +.. + + + +Matches: + +
+ + +
+ +
+
+ + + + + + + diff --git a/book-user-code/show_tcp.html b/book-user-code/show_tcp.html new file mode 100644 index 000000000000..eaaa81706b48 --- /dev/null +++ b/book-user-code/show_tcp.html @@ -0,0 +1,854 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + +File: + + + +
+ +
+
+ + + + + + + diff --git a/book-user-code/spantoprof.cc b/book-user-code/spantoprof.cc new file mode 100644 index 000000000000..fe23d83cf9b4 --- /dev/null +++ b/book-user-code/spantoprof.cc @@ -0,0 +1,1250 @@ +// Little program to convert span rows to profile per row +// +// Filter from stdin to stdout, producing row profile(d) or group profile JSON +// +// Copyright 2021 Richard L. Sites +// +// Compile with g++ -O2 spantoprof.cc -o spantoprof +// + +#include +#include +#include +#include // for pair + +#include +#include // exit +#include + +#include "basetypes.h" +#include "kutrace_lib.h" + + +using std::map; +using std::multimap; +using std::set; +using std::string; + +#define pid_idle 0 +#define event_idle (0x10000 + pid_idle) + +static const int SUMM_CPU = 0; +static const int SUMM_PID = 1; +static const int SUMM_RPC = 2; + +static const int SortByCpuNumber = 0; +static const int SortByBasenameDotElapsed = 1; +static const int SortByBasenameUnderscoreElapsed = 2; + +static const double kTEN_NSEC = 0.000000010; + +static const char* kPresorted = " \"presorted\""; // Note space + +// These label the group summary rows +static const char* kSuffix[32] = { + "_1us", "_2us", "_4us", "_8us", "_16us", "_32us", "_64us", "_125us", "_250us", "_500us", + "_1ms", "_2ms", "_4ms", "_8ms", "_16ms", "_32ms", "_64ms", "_125ms", "_256ms", "_512ms", + "_1s", "_2s", "_4s", "_8s", "_16s", "_32s", "_64s", "_128s", "_256s", "_512s", + "_1Ks", "_2Ks" +}; + +// These sort in descending order of lg(elapsed time) +static const char* kSortSuffix[32] = { + "_31", "_30", "_29", "_28", "_27", "_26", "_25", "_24", "_23", "_22", + "_21", "_20", "_19", "_18", "_17", "_16", "_15", "_14", "_13", "_12", + "_11", "_10", "_09", "_08", "_07", "_06", "_05", "_04", "_03", "_02", + "_01", "_00" +}; + +// Map granular IPC values 0..15 to midpoint multiple of 1/16 per range +// thus, 0..1/8 maps to 1/16 and 3.5..4 maps to 3.75 = 60/16 +static const double kIpcToLinear [16] = { + 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 13.0, 15.0, + 18.0, 22.0, 26.0, 30.0, 36.0, 44.0, 52.0, 60.0 +}; + +// Going the other way, round down to 0..15 +static const int kLinearToIpc[64] = { + 0,0, 1,1, 2,2, 3,3, 4,4, 5,5, 6,6, 7,7, + 8,8,8,8, 9,9,9,9, 10,10,10,10, 11,11,11,11, + 12,12,12,12,12,12,12,12, 13,13,13,13,13,13,13,13, + 14,14,14,14,14,14,14,14, 15,15,15,15,15,15,15,15 +}; + +// Each JSON input record +typedef struct { + double start_ts; // Seconds + double duration; // Seconds + int cpu; + int pid; + int rpcid; + int eventnum; + int arg; + int retval; + int ipc; + string name; +} OneSpan; + +// This aggregates a number of identical events by name, summing their durations +// including a weights IPC sum +typedef struct { + double start_ts; + double duration; + double ipcsum; // Seconds * sixteenths of an IPC + int eventnum; + int arg; + string event_name; +} EventTotal; + +// These are all the events in a row +typedef map RowSummary; + +// We use this for sorting events in a row by start_ts +// There will be duplicates, so multimap +typedef multimap RowSummaryDP; + +///// We use this for sorting events in a row by some string criterion +//typedef multimap RowSummarySP; + +// This aggregates one or more identically-named rows within CPUs, PIDs, RPCs +// Each has a summary of the contained events for those rows +typedef struct { + double lo_ts; + double hi_ts; + int rownum; + int rowcount; // The number of rows merged together here + bool proper_row_name; + string row_name; + RowSummary rowsummary; +} RowTotal; + +// These are all the rows in a group (Cpu, Pid, Rpc) +// indexed by cpu/pid/rpc number +typedef map GroupSummary; + +// These are all the rows in a group (Cpu, Pid, Rpc) +// indexed by a name +typedef map GroupSummary2; + +// We use this for sorting rows in a group by some string criterion +typedef multimap GroupSummarySP; + + +// Top-level data structure +typedef struct { + // Profile of KUtrace spans across cpu,pid,rpc rows + // Level 1 totals across single rows + GroupSummary cpuprof; + GroupSummary pidprof; + GroupSummary rpcprof; + // Level 2 totals across similar-name rows within a group + GroupSummary2 cpuprof2; + GroupSummary2 pidprof2; + GroupSummary2 rpcprof2; +} Summary; + + +// Globals +static int span_count = 0; +static Summary summary; // Aggregates across the entire trace + +static bool dorow = true; // default to -row +static bool dogroup = false; +static bool doall = false; // if true, show even one-row merges +static bool verbose = false; + +static int output_events = 0; + + +void DumpSpan(FILE* f, const char* label, const OneSpan* span) { + fprintf(f, "%s <%12.8lf %10.8lf %d %d %d %d %d %d %d %s>\n", + label, span->start_ts, span->duration, span->cpu, + span->pid, span->rpcid, span->eventnum, span->arg, span->retval, span->ipc, span->name.c_str()); +} + +void DumpSpanShort(FILE* f, const OneSpan* span) { + fprintf(f, "<%12.8lf %10.8lf ... %s> ", span->start_ts, span->duration, span->name.c_str()); +} + +void DumpEvent(FILE* f, const char* label, const OneSpan& event) { + fprintf(f, "%s [%12.8lf %10.8lf %d %d %d %d %d %d %d %s]\n", + label, event.start_ts, event.duration, event.cpu, + event.pid, event.rpcid, event.eventnum, event.arg, event.retval, event.ipc, event.name.c_str()); +} + +void DumpOneEvent(FILE* f, const EventTotal& eventtotal) { + fprintf(f, " [%d] %12.8lf %10.8lf %10.8lf %s\n", + eventtotal.eventnum, eventtotal.start_ts, eventtotal.duration, eventtotal.ipcsum, eventtotal.event_name.c_str()); +} + +void DumpOneRow(FILE* f, const RowTotal& rowtotal) { + fprintf(f, " [%d] %12.8lf %10.8lf '%s'\n", + rowtotal.rownum, rowtotal.lo_ts, rowtotal.hi_ts, rowtotal.row_name.c_str()); + for (RowSummary::const_iterator it = rowtotal.rowsummary.begin(); + it != rowtotal.rowsummary.end(); + ++it) { + const EventTotal& eventtotal = it->second; + DumpOneEvent(f, eventtotal); + } +} + +void DumpRowSummary(FILE* f, const char* label, const GroupSummary& groupsummary) { + fprintf(f, "\n%s\n--------\n", label); + for (GroupSummary::const_iterator it = groupsummary.begin(); it != groupsummary.end(); ++it) { + const RowTotal& rowtotal = it->second; + DumpOneRow(f, rowtotal); + } +} + +void DumpRowSummary2(FILE* f, const char* label, const GroupSummary2& groupsummary) { + fprintf(f, "\n%s\n--------\n", label); + for (GroupSummary2::const_iterator it = groupsummary.begin(); it != groupsummary.end(); ++it) { + const RowTotal& rowtotal = it->second; + DumpOneRow(f, rowtotal); + } +} + +void DumpSummary(FILE* f, const Summary& summ) { + fprintf(f, "\nDumpSummary\n===========\n"); + DumpRowSummary(f, "cpuprof", summ.cpuprof); + DumpRowSummary(f, "pidprof", summ.pidprof); + DumpRowSummary(f, "rpcprof", summ.rpcprof); +} +void DumpSummary2(FILE* f, const Summary& summ) { + fprintf(f, "\nDumpSummary2\n===========\n"); + DumpRowSummary2(f, "cpuprof2", summ.cpuprof2); + DumpRowSummary2(f, "pidprof2", summ.pidprof2); + DumpRowSummary2(f, "rpcprof2", summ.rpcprof2); +} + +string IntToString(int x) { + char temp[24]; + sprintf(temp, "%d", x); + return string(temp); +} + +string IntToString0000(int x) { + char temp[24]; + sprintf(temp, "%04d", x); + return string(temp); +} + +string DoubleToString(double x) { + char temp[24]; + sprintf(temp, "%12.8lf", x); + return string(temp); +} + +string MaybeExtend(string s, int x) { + string maybe = "." + IntToString(x); + if (s.find(maybe) == string::npos) {return s + maybe;} + return s; +} + + +// Return floor of log base2 of x, i.e. the number of bits-1 needed to hold x +int FloorLg(uint64 x) { + int lg = 0; + uint64 local_x = x; + if (local_x & 0xffffffff00000000LL) {lg += 32; local_x >>= 32;} + if (local_x & 0xffff0000LL) {lg += 16; local_x >>= 16;} + if (local_x & 0xff00LL) {lg += 8; local_x >>= 8;} + if (local_x & 0xf0LL) {lg += 4; local_x >>= 4;} + if (local_x & 0xcLL) {lg += 2; local_x >>= 2;} + if (local_x & 0x2LL) {lg += 1; local_x >>= 1;} + return lg; +} + +// d is in seconds; we return lg of d in usec +// We multiply by 1024000 to make 1ms an exact power of 2. +// Buckets smaller than 125 usec are off by 2%,as are buckets > 512ms. +int DFloorLg(double d) { + if (d <= 0.0) return 0; + uint64 x = d * 1024000.0; + int retval = FloorLg(x); +//fprintf(stderr, " DFloorLg(%lf) = %d\n", d, retval); + return retval; +} + + +// (2) RPC point event +bool IsAnRpc(const OneSpan& event) { + return ((KUTRACE_RPCIDREQ <= event.eventnum) && (event.eventnum <= KUTRACE_RPCIDMID)); +} +bool IsAnRpcnum(int eventnum) { + return ((KUTRACE_RPCIDREQ <= eventnum) && (eventnum <= KUTRACE_RPCIDMID)); +} + +// (2) pc_sample point event +bool IsAPcSample(const OneSpan& event) { + return ((event.eventnum == KUTRACE_PC_U) || (event.eventnum == KUTRACE_PC_K) || (event.eventnum == KUTRACE_PC_TEMP)); +} +// (2) pc_sample event +bool IsAPcSamplenum(int eventnum) { + return ((eventnum == KUTRACE_PC_U) || (eventnum == KUTRACE_PC_K) || (eventnum == KUTRACE_PC_TEMP)); +} + +// (3) Lock event +bool IsALock(const OneSpan& event) { + return ((event.eventnum == KUTRACE_LOCK_HELD) || (event.eventnum == KUTRACE_LOCK_TRY)); +} +// (3) Lock event +bool IsALocknum(int eventnum) { + return ((eventnum == KUTRACE_LOCK_HELD) || (eventnum == KUTRACE_LOCK_TRY)); +} +bool IsALockTry(const OneSpan& event) { + return (event.eventnum == KUTRACE_LOCK_TRY); +} +bool IsALockTrynum(int eventnum) { + return (eventnum == KUTRACE_LOCK_TRY); +} +bool IsALockHeld(const OneSpan& event) { + return (event.eventnum == KUTRACE_LOCK_HELD); +} +bool IsALockHeldnum(int eventnum) { + return (eventnum == KUTRACE_LOCK_HELD); +} + + + +// (3) Any kernel-mode execution event +bool IsKernelmode(const OneSpan& event) { + return ((KUTRACE_TRAP <= event.eventnum) && (event.eventnum < event_idle)); +} +bool IsKernelmodenum(int eventnum) { + return ((KUTRACE_TRAP <= eventnum) && (eventnum < event_idle)); +} + +// (4) +bool IsAnIdle(const OneSpan& event) { + return (event.eventnum == event_idle); +} +bool IsAnIdlenum(int eventnum) { + return (eventnum == event_idle); +} +bool IsCExitnum(int eventnum) { + return (eventnum == 0x20000); +} +bool IsAnIdleCstatenum(int eventnum) { + return IsAnIdlenum(eventnum) || IsCExitnum(eventnum); +} + +// (4) Any user-mode-execution event, in range 0x10000 .. 0x1ffff +// These includes the idle task +bool IsUserExec(const OneSpan& event) { + return ((event.eventnum & 0xF0000) == 0x10000); +} +bool IsUserExecnum(int eventnum) { + return ((eventnum & 0xF0000) == 0x10000); +} + +// (4) These exclude the idle task +bool IsUserExecNonidle(const OneSpan& event) { + return ((event.eventnum & 0xF0000) == 0x10000) && !IsAnIdle(event); +} +bool IsUserExecNonidlenum(int eventnum) { + return ((eventnum & 0xF0000) == 0x10000) && !IsAnIdlenum(eventnum); +} + + +bool IsAWait(const OneSpan& event) { + if (event.duration < 0) {return false;} + if ((KUTRACE_WAITA <= event.eventnum) && (event.eventnum <= KUTRACE_WAITZ)) {return true;} + return false; +} + +bool IsAWaitnum(int eventnum) { + if ((KUTRACE_WAITA <= eventnum) && (eventnum <= KUTRACE_WAITZ)) {return true;} + return false; +} + +bool IsAFreq(const OneSpan& event) { + return (KUTRACE_PSTATE == event.eventnum); +} +bool IsAFreqnum(int eventnum) { + return (KUTRACE_PSTATE == eventnum); +} + +bool IsRowMarkernum(int eventnum) { + return (KUTRACE_LEFTMARK == eventnum); +} + +bool IncreasesCPUnum(int eventnum) { + // Execution: traps, interrupts, syscalls, idle, user-mode, c-exit + if (KUTRACE_TRAP <= eventnum) {return true;} + // We still have names, specials, marks, PCsamps + // Keep waits + if (IsAWaitnum(eventnum)) {return true;} + return false; +} + +// True if this item contributes to non-zero CPU duration or we otherwise want to roll it up +// We keep PC samples so we can make a sampled profile +// We keep frequencies so we can give the average clock rate for each row +bool IsCpuContrib(const OneSpan& event) { + if (event.duration < 0) {return false;} + // Execution: traps, interrupts, syscalls, idle, user-mode, c-exit + if (KUTRACE_TRAP <= event.eventnum) {return true;} + // We still have names, specials, marks, PCsamps + // Keep PCsamp and frequency + if (IsAPcSample(event)) {return true;} // PC sample overlay + if (IsAFreq(event)) {return true;} // Frequency overlay + return false; +} + +// True if this item contributes to non-zero CPU duration or we otherwise want to roll it up +// We ignore PID 0 +// We keep PC samples so we can make a sampled profile +// We keep frequencies so we can give the average clock rate for each row +bool IsPidContrib(const OneSpan& event) { + if (event.duration < 0) {return false;} + if (event.pid <= 0) {return false;} + // Execution: traps, interrupts, syscalls, idle, user-mode, c-exit + if (KUTRACE_TRAP <= event.eventnum) {return true;} + // We still have names, specials, marks, PCsamps + // Keep waits, PCsamp, frequency, locks + if (IsAWait(event)) {return true;} // PID Waiting + if (IsAPcSample(event)) {return true;} // PC sample overlay + if (IsAFreq(event)) {return true;} // Frequency overlay + if (IsALock(event)) {return true;} // Lock overlay + return false; +} + +// True if this item contributes to non-zero CPU duration or we otherwise want to roll it up +// We ignore RPC 0 +// We keep PC samples so we can make a sampled profile +// We keep frequencies so we can give the average clock rate for each row +bool IsRpcContrib(const OneSpan& event) { + if (event.duration < 0) {return false;} + if (event.rpcid <= 0) {return false;} + // Execution: traps, interrupts, syscalls, idle, user-mode, c-exit + if (KUTRACE_TRAP <= event.eventnum) {return true;} + // We still have names, specials, marks, PCsamps + // Keep waits, PCsamp, frequency, locks + if (IsAWait(event)) {return true;} // RPC Waiting + if (IsAPcSample(event)) {return true;} // PC sample overlay + if (IsAFreq(event)) {return true;} // Frequency overlay + if (IsALock(event)) {return true;} // Lock overlay + return false; +} + +// These have good PID row names +bool IsGoodPidName(const OneSpan& event) { + // if (event.duration < 0) {return false;} + if (KUTRACE_LEFTMARK == event.eventnum) {return true;} + return IsUserExec(event); +} + +// These have good RPC row names (method names) +bool IsGoodRpcName(const OneSpan& event) { + // if (event.duration < 0) {return false;} + if (event.rpcid == 0) {return false;} + return IsAnRpc(event); +} + + +double dmin(double a, double b) {return (a < b) ? a : b;} +double dmax(double a, double b) {return (a > b) ? a : b;} + + +// Event keys are event names +void MergeEventInRow(const EventTotal& eventtotal, RowSummary* aggpereventsummary) { + if (aggpereventsummary->find(eventtotal.event_name) == aggpereventsummary->end()) { + // Add new event + (*aggpereventsummary)[eventtotal.event_name] = eventtotal; + return; + } + + EventTotal* es = &(*aggpereventsummary)[eventtotal.event_name]; + // The real action + es->duration += eventtotal.duration; + es->ipcsum += eventtotal.ipcsum; +} + +bool CheckRowname(const char* label, const string& rowname) { + if(rowname.length() < 2) { + fprintf(stderr, "Bad rowname_%s %s\n", label, rowname.c_str()); + return false; + } + return true; +} + +// Merge rowtotal into groupaggregate[key], making a row as needed +void MergeOneRow(int rownum, const string& key, + const string& rowname, const RowTotal& rowtotal, + GroupSummary2* groupaggregate) { + if (groupaggregate->find(key) == groupaggregate->end()) { + // Add new row and name it + RowTotal temp; + temp.lo_ts = 0.0; + temp.hi_ts = 0.0; + temp.rownum = rownum; // The cpu/pid/rpc# first encountered for this new row + temp.rowcount = 0; + temp.proper_row_name = true; + temp.row_name.clear(); + temp.row_name = rowname; +//CheckRowname("a", temp.row_name); + temp.rowsummary.clear(); +//fprintf(stderr, "Merg lo/hi_ts[%s] = %12.8f %12.8f\n", +//temp.row_name.c_str(), temp.lo_ts, temp.hi_ts); + + (*groupaggregate)[key]= temp; +//fprintf(stderr, "[%s] %s new aggregate row %d \n", key.c_str(), rowname.c_str(), rownum); + } + + RowTotal* aggrowsumm = &(*groupaggregate)[key]; + ++aggrowsumm->rowcount; // Count how many rows are merged together here + + // Merge in the individual events per row + for (RowSummary::const_iterator it = rowtotal.rowsummary.begin(); + it != rowtotal.rowsummary.end(); + ++it) { + const EventTotal& eventtotal = it->second; + MergeEventInRow(eventtotal, &aggrowsumm->rowsummary); + } +} + +// Scan all the events in this row and calc their average duration over all merged rows +void DivideByRowcount(RowTotal* rowtotal) { +//fprintf(stderr, "DivideByRowcount [%d] %s rowcount=%d\n", +//rowtotal->rownum, rowtotal->row_name.c_str(), rowtotal->rowcount); + for (RowSummary::iterator it = rowtotal->rowsummary.begin(); + it != rowtotal->rowsummary.end(); + ++it) { + EventTotal* eventtotal = &it->second; + eventtotal->duration /= rowtotal->rowcount; + eventtotal->ipcsum /= rowtotal->rowcount; + } +} + +// Strip off the .123 or _2us at the end, if any. +// But do not match n leading period in ./run_me +string Basename(const string& name, const char* delim) { + int delim_pos = name.rfind(delim); + if ((delim_pos != string::npos) && (0 < delim_pos)) { + return name.substr(0, delim_pos); + } + return name; +} + +// Total up rows by name prefix, i.e. up to a period +void MergeGroupRows(const GroupSummary& groupsummary, GroupSummary2* groupaggregate) { + for (GroupSummary::const_iterator it = groupsummary.begin(); it != groupsummary.end(); ++it) { + const RowTotal* rowtotal = &it->second; + double row_duration = rowtotal->hi_ts - rowtotal->lo_ts; +//if (row_duration < 0.0) { +//fprintf(stderr, "Bad duration_row\n"); +//DumpOneRow(stderr, *rowtotal); +//} + int lg_row_duration = DFloorLg(row_duration); // lg of usec + if (23 < lg_row_duration) {lg_row_duration = 23;} // max bucket is [8 ...) seconds, 2**23 + + string row_basename = Basename(rowtotal->row_name, "."); + // If the basename is entirely digits, assume we have a CPU number. + // We want to average across all the CPUs, not 0_AVG, 1_AVG, ... + int non_digit = row_basename.find_first_not_of("0123456789 "); + bool is_cpu_number = (non_digit == string::npos); // Only digits/blank + + // We want to accumulate incoming rows with the same row_basename. + // We achive this by using the row name (including lg) as the key, ignoring the original + // CPU#, PID#, RPC# + string key_name = row_basename + kSortSuffix[lg_row_duration]; + string visible_name = row_basename + kSuffix[lg_row_duration]; + +//fprintf(stderr, "MergeGroupRows [%d] '%s' %s %s %8.6lf\n", +//rowtotal->rownum, rowtotal->row_name.c_str(), key_name.c_str(), visible_name.c_str(), row_duration); + + // Level 1 row summary + int row_basenum = rowtotal->rownum; + MergeOneRow(row_basenum, key_name, visible_name, *rowtotal, groupaggregate); + + // Level 2 group summary + // Offset row number from the first-order row numbers + if (is_cpu_number) { + MergeOneRow(row_basenum, "CPU_AVG", "CPU_AVG", *rowtotal, groupaggregate); + } else { + MergeOneRow(row_basenum, row_basename + "_AVG", + row_basename + "_AVG", *rowtotal, groupaggregate); + } + } + + // Now go back and divide all the aggregated durations by rowcount + for (GroupSummary2::iterator it = groupaggregate->begin(); it != groupaggregate->end(); ++it) { + RowTotal* aggrowtotal = &it->second; + if (1 < aggrowtotal->rowcount) { + char temp[24]; + sprintf(temp, " (%d)", aggrowtotal->rowcount); + aggrowtotal->row_name += temp; + DivideByRowcount(aggrowtotal); + } + } +} + +// For CPU/PID/RPC summary rows, add together groups with the same name before any period, +// putting into power-of-two buckets by row duration lo_ts..hi_ts +// and also making one grand total (overall average per group). +void MergeRows(Summary* summ) { +//fprintf(stderr, "MergeRows\n"); + MergeGroupRows(summ->cpuprof, &summ->cpuprof2); + MergeGroupRows(summ->pidprof, &summ->pidprof2); + MergeGroupRows(summ->rpcprof, &summ->rpcprof2); +} + +void Prune2(GroupSummary2* groupsummary) { + // Go find all the basenames with basename_AVG rowcount greater than 1 + set keepset; + for (GroupSummary2::iterator it = groupsummary->begin(); it != groupsummary->end(); ++it) { + RowTotal* rowtotal = &it->second; + if ((1 < rowtotal->rowcount) && + (rowtotal->row_name.find("_AVG") != string::npos)) { + string basename = Basename(rowtotal->row_name, "_"); + keepset.insert(basename); + } + } + + // Now prune everything with rowcount = 1 that is not in keepset + for (GroupSummary2::iterator it = groupsummary->begin(); it != groupsummary->end(); ++it) { + RowTotal* rowtotal = &it->second; + if ((1 == rowtotal->rowcount) && + (keepset.find(Basename(rowtotal->row_name, "_")) == keepset.end())) { + rowtotal->rowcount = 0; + } + } +} + +// prune away all group rows that have rowcount=1, +// unless the basename_AVG count is more than 1 +void PruneGroups(Summary* summ) { + if (doall) {return;} + Prune2(&summ->cpuprof2); + Prune2(&summ->pidprof2); + Prune2(&summ->rpcprof2); +} + +// Input is a string integer; add one +void IncrString(string* s) { + int subscr = s->length() - 1; + while ((0 <= subscr) && ((*s)[subscr] >= '9')) { + (*s)[subscr--] = '0'; + } + if (0 <= subscr) {(*s)[subscr] += 1;} // Else wrap around +} + + +// This first sorts the row items into user, kernel, other, idle +// and within each group descending by duration +// +// In addition to a mian CPU-execution and wait-non-execution timeline, +// there are several overlays wiht separate timelines: +// frequency +// PC samples +// locks +void RewriteOneRow(RowTotal* rowtotal) { + // Step (1) Build side multimap by sort keys + RowSummaryDP sorted_row; + for (RowSummary::const_iterator it = rowtotal->rowsummary.begin(); + it != rowtotal->rowsummary.end(); + ++it) { + const EventTotal* eventtotal = &it->second; + // We use the key value to sort events. Full traces are limited to + // 1000 seconds, so we offset by that + // Negating the values gives us a descending sort instead of + double key = 0.0; + if (IsRowMarkernum(eventtotal->eventnum)) { + key = -2000.0; // Always first + } else if (IsUserExecNonidlenum(eventtotal->eventnum)) { + key = -1000.0 - eventtotal->duration; // User + } else if (IsKernelmodenum(eventtotal->eventnum)) { + key = -1000.0 - eventtotal->duration; // Kernel + } else if (IsAPcSamplenum(eventtotal->eventnum)) { // PC sample overlay + key = -1000.0 - eventtotal->duration; + } else if (IsALockHeldnum(eventtotal->eventnum)) { // Lock overlay + key = -1000.0 - eventtotal->duration; + } else if (!IsAnIdleCstatenum(eventtotal->eventnum)) { // Wait, freq/lock overlay + key = 0.0 - eventtotal->duration; + } else { + // Idle is last + key = 1000.0 - eventtotal->duration; // Idle + } + sorted_row.insert(std::pair(key, eventtotal)); +if (verbose){ +fprintf(stdout, "sorted_row[%12.8lf] =", key); +DumpOneEvent(stdout, *eventtotal); +} + } + + // Step (2) Rewrite the underlying map into sorted order, by building + // a second map and swapping + string temp_next = string("000000"); + RowSummary temp; + for (RowSummaryDP::iterator it = sorted_row.begin(); it != sorted_row.end(); ++it) { + const EventTotal* eventtotal = it->second; + temp[temp_next] = *eventtotal; + IncrString(&temp_next); + } + rowtotal->rowsummary.swap(temp); + + // Step (3) Rewrite the start times + // Three running totals from zero: freq, pcsamp, other (e.g. cpu/wait) + double cpu_prior_end_ts = 0.0; // Main timeline + double samp_prior_end_ts = 0.0; // Overlay + double freq_prior_end_ts = 0.0; // Overlay + double lock_prior_end_ts = 0.0; // Overlay + + for (RowSummary::iterator it = rowtotal->rowsummary.begin(); + it != rowtotal->rowsummary.end(); + ++it) { + EventTotal* eventtotal = &it->second; + if (IsAFreqnum(eventtotal->eventnum)) { + eventtotal->start_ts = freq_prior_end_ts; + freq_prior_end_ts = eventtotal->start_ts + eventtotal->duration; + } else if (IsAPcSamplenum(eventtotal->eventnum)) { + eventtotal->start_ts = samp_prior_end_ts; + samp_prior_end_ts = eventtotal->start_ts + eventtotal->duration; + } else if (IsALocknum(eventtotal->eventnum)) { + eventtotal->start_ts = lock_prior_end_ts; + lock_prior_end_ts = eventtotal->start_ts + eventtotal->duration; + } else { + eventtotal->start_ts = cpu_prior_end_ts; + cpu_prior_end_ts = eventtotal->start_ts + eventtotal->duration; + } + } + + // Track elapsed time just by the CPU/wait items, not PC/freq/lock + rowtotal->lo_ts = 0.0; + rowtotal->hi_ts = cpu_prior_end_ts; +if (verbose) { + fprintf(stderr, "Rewrite lo/hi_ts[%s] = %12.8f %12.8f\n", + rowtotal->row_name.c_str(), rowtotal->lo_ts, rowtotal->hi_ts); +} +} + +void RewritePerRowTimes(GroupSummary* groupsummary) { + for (GroupSummary::iterator it = groupsummary->begin(); it != groupsummary->end(); ++it) { + RowTotal* rowtotal = &it->second; + RewriteOneRow(rowtotal); + } +} + +void RewritePerRowTimes2(GroupSummary2* groupsummary) { + for (GroupSummary2::iterator it = groupsummary->begin(); it != groupsummary->end(); ++it) { + RowTotal* rowtotal = &it->second; + RewriteOneRow(rowtotal); + } +} + +// Input: All items in row start at 0.0 but have non-zero durations +// Output: All items in row have consecutive start times, based on sort order +// and row hi_ts is set to max of cpu,pid,and rpc total elapsed time +void RewriteStartTimes(Summary* summ) { +//fprintf(stderr, "RewriteStartTimes\n"); + RewritePerRowTimes(&summ->cpuprof); + RewritePerRowTimes(&summ->pidprof); + RewritePerRowTimes(&summ->rpcprof); + RewritePerRowTimes2(&summ->cpuprof2); + RewritePerRowTimes2(&summ->pidprof2); + RewritePerRowTimes2(&summ->rpcprof2); +} + +string GetKey(int sorttype, const RowTotal* rowtotal) { + string key; + double elapsed = rowtotal->hi_ts - rowtotal->lo_ts; + switch (sorttype) { + case SortByCpuNumber: + key = IntToString0000(rowtotal->rownum); + break; + case SortByBasenameDotElapsed: + key = Basename(rowtotal->row_name, ".") + DoubleToString(elapsed); + break; + case SortByBasenameUnderscoreElapsed: + ////key = Basename(rowtotal->row_name, "_") + IntToString0000(DFloorLg(elapsed)); + key = Basename(rowtotal->row_name, "_") + DoubleToString(elapsed); + break; + } + return key; +} + +void SortRows(int sorttype, GroupSummary* groupsummary) { + // Step (1) Build side multimap by sort keys + GroupSummarySP sorted_group; + for (GroupSummary::const_iterator it = groupsummary->begin(); it != groupsummary->end(); ++it) { + const RowTotal* rowtotal = &it->second; + string key = GetKey(sorttype, rowtotal); + sorted_group.insert(std::pair(key, rowtotal)); +//fprintf(stderr, "SortRows_%d insert [%s]\n", sorttype, key.c_str()); + } + + // Step (2) Rewrite the underlying map into sorted order, by building + // a second map and swapping + int temp_next = 0; + GroupSummary temp; + for (GroupSummarySP::const_iterator it = sorted_group.begin(); it != sorted_group.end(); ++it) { + const RowTotal* rowtotal = it->second; + temp[temp_next] = *rowtotal; + ++temp_next; + } + groupsummary->swap(temp); +} + +void SortRows2(int sorttype, GroupSummary2* groupsummary) { + // Step (1) Build side multimap by sort keys + GroupSummarySP sorted_group; + for (GroupSummary2::const_iterator it = groupsummary->begin(); it != groupsummary->end(); ++it) { + const RowTotal* rowtotal = &it->second; + string key = GetKey(sorttype, rowtotal); + sorted_group.insert(std::pair(key, rowtotal)); +//fprintf(stderr, "SortRows2_%d insert [%s]\n", sorttype, key.c_str()); + } + + // Step (2) Rewrite the underlying map into sorted order, by building + // a second map and swapping + string temp_next = string("000000"); + GroupSummary2 temp; + for (GroupSummarySP::const_iterator it = sorted_group.begin(); it != sorted_group.end(); ++it) { + const RowTotal* rowtotal = it->second; + temp[temp_next] = *rowtotal; + IncrString(&temp_next); + } + groupsummary->swap(temp); +} + +// Within each group, sort using various orderings. The downstream JSON will +// display items in the order encountered, wihtno further sorting. +void SortAllRows(Summary* summ) { + //Level 1: Summaries across individual cpu#, pid#, rpc# + SortRows(SortByCpuNumber, &summ->cpuprof); + SortRows(SortByBasenameDotElapsed, &summ->pidprof); + SortRows(SortByBasenameDotElapsed, &summ->rpcprof); + // Level 2: Summaries across level 1 rows with like names within group + SortRows2(SortByCpuNumber, &summ->cpuprof2); + SortRows2(SortByBasenameUnderscoreElapsed, &summ->pidprof2); + SortRows2(SortByBasenameUnderscoreElapsed, &summ->rpcprof2); +} + + +void WriteOneRowJson(FILE* f, int type, const RowTotal& rowtotal, int new_rownum) { +//fprintf(stderr, "WriteOneRowJson [%d] %s size=%d\n", rowtotal.rownum, rowtotal.row_name.c_str(), (int)//(rowtotal.rowsummary.size())); + // Ignore merged rows that are redundant, marked by rowcount == zero + if (rowtotal.rowcount == 0) {return;} + + int rownum = rowtotal.rownum; + for (RowSummary::const_iterator it = rowtotal.rowsummary.begin(); + it != rowtotal.rowsummary.end(); + ++it) { + const EventTotal* eventtotal = &it->second; + double ts_sec = eventtotal->start_ts; + double dur_sec = eventtotal->duration; + int ipc = 0; + if (0.0 < eventtotal->duration) { + ipc = eventtotal->ipcsum / eventtotal->duration; + ipc = kLinearToIpc[ipc]; // Map back to granular + } + switch (type) { + case SUMM_CPU: + // ts dur cpu pid rpc event arg ret ipc name + fprintf(f, "[%12.8lf, %10.8lf, %d, %d, %d, %d, %d, %d, %d, \"%s\"],\n", + ts_sec, dur_sec, new_rownum, -1, -1, eventtotal->eventnum, + eventtotal->arg, 0, ipc, eventtotal->event_name.c_str()); + break; + case SUMM_PID: + fprintf(f, "[%12.8lf, %10.8lf, %d, %d, %d, %d, %d, %d, %d, \"%s\"],\n", + ts_sec, dur_sec, -1, new_rownum, -1, eventtotal->eventnum, + eventtotal->arg, 0, ipc, eventtotal->event_name.c_str()); + break; + case SUMM_RPC: + fprintf(f, "[%12.8lf, %10.8lf, %d, %d, %d, %d, %d, %d, %d, \"%s\"],\n", + ts_sec, dur_sec, -1, -1, new_rownum, eventtotal->eventnum, + eventtotal->arg, 0, ipc, eventtotal->event_name.c_str()); + break; + } + ++output_events; + } +} + +int WritePerRowJson(FILE* f, int type, const GroupSummary& groupsummary, int new_rownum) { + for (GroupSummary::const_iterator it = groupsummary.begin(); it != groupsummary.end(); ++it) { + const RowTotal& rowtotal = it->second; + WriteOneRowJson(f, type, rowtotal, new_rownum); + ++new_rownum; + } + return new_rownum; +} + +int WritePerRowJson2(FILE* f, int type, const GroupSummary2& groupsummary, int new_rownum) { + for (GroupSummary2::const_iterator it = groupsummary.begin(); it != groupsummary.end(); ++it) { + const RowTotal& rowtotal = it->second; + // If rowcount is zero, ignore it + if (0 < rowtotal.rowcount) { + WriteOneRowJson(f, type, rowtotal, new_rownum); + ++new_rownum; + } + } + return new_rownum; +} + +void WriteSummaryJsonRow(FILE* f, const Summary& summ) { +//fprintf(stderr, "WriteSummaryJsonRow\n"); + int new_rownum; + //fprintf(stdout, "\"events\" : [\n"); + new_rownum = 0x10000; + new_rownum = WritePerRowJson(f, SUMM_CPU, summ.cpuprof, new_rownum); + new_rownum = WritePerRowJson(f, SUMM_PID, summ.pidprof, new_rownum); + new_rownum = WritePerRowJson(f, SUMM_RPC, summ.rpcprof, new_rownum); + fprintf(f, "[999.0, 0.0, 0, 0, 0, 0, 0, 0, 0, \"\"]\n"); // no comma + fprintf(stdout, "]}\n"); +} + +void WriteSummaryJsonGroup(FILE* f, const Summary& summ) { +//fprintf(stderr, "WriteSummaryJsonGroup\n"); + int new_rownum; + //fprintf(stdout, "\"events\" : [\n"); + new_rownum = 0x20000; + new_rownum = WritePerRowJson2(f, SUMM_CPU, summ.cpuprof2, new_rownum); + new_rownum = WritePerRowJson2(f, SUMM_PID, summ.pidprof2, new_rownum); + new_rownum = WritePerRowJson2(f, SUMM_RPC, summ.rpcprof2, new_rownum); + fprintf(f, "[999.0, 0.0, 0, 0, 0, 0, 0, 0, 0, \"\"]\n"); // no comma + fprintf(stdout, "]}\n"); +} + + +// Accumulate time for an item in rowsummary[name] +// Keys are event names rather than event numbers +void AddItemInRow(int rownum, int eventnum, const OneSpan& item, RowSummary* rowsummary) { + if (eventnum < 0) {return;} + + if (rowsummary->find(item.name) == rowsummary->end()) { + // Add new event and name it + EventTotal temp; + temp.start_ts = 0.0; + temp.duration = 0.0; + temp.ipcsum = 0.0; + temp.eventnum = eventnum; + temp.arg = item.arg; + temp.event_name.clear(); + temp.event_name = item.name; + (*rowsummary)[item.name] = temp; +//fprintf(stdout, " new event [%d,%d] %s\n", rownum, eventnum, item.name.c_str()); + } + + // The real action; aggregate (sum durations) by item name + EventTotal* es = &(*rowsummary)[item.name]; + es->duration += item.duration; + es->ipcsum += (item.duration * kIpcToLinear[item.ipc]); +} + +// Add an item to groupsummary[rownum] +// Rownum is cpu number, PID, or RPCid +void AddItem(const char* label, int rownum, int eventnum, const OneSpan& item, GroupSummary* groupsummary) { +//fprintf(stderr, "AddItem[%d,%d] %s\n", rownum, eventnum, item.name.c_str()); + if (rownum < 0) {return;} + + if (groupsummary->find(rownum) == groupsummary->end()) { + // Add new row and name it + // The very first item for this row might not have a proper name for the row; + // we may add a better name later + RowTotal temp; + temp.lo_ts = 999.999999; + temp.hi_ts = 0.0; + temp.rownum = rownum; + temp.rowcount = 1; + temp.proper_row_name = false; + temp.row_name.clear(); + temp.row_name = item.name; +//CheckRowname("b", temp.row_name); + temp.rowsummary.clear(); +//fprintf(stderr, "Additem lo/hi_ts[%s] = %12.8f %12.8f\n", +//temp.row_name.c_str(), temp.lo_ts, temp.hi_ts); + + (*groupsummary)[rownum]= temp; +if (verbose) fprintf(stdout, "%s new row [%d] = %s\n", label, rownum, item.name.c_str()); +//DumpSpan(stdout, "item:", &item); + } + + RowTotal* rs = &(*groupsummary)[rownum]; + if (IncreasesCPUnum(eventnum)) { + rs->lo_ts = dmin(rs->lo_ts, item.start_ts); + rs->hi_ts = dmax(rs->hi_ts, item.start_ts + item.duration); +//fprintf(stderr, "Additem %d lo/hi_ts[%s] = %12.8f %12.8f\n", +//eventnum, rs->row_name.c_str(), rs->lo_ts, rs->hi_ts); + } + AddItemInRow(rownum, eventnum, item, &rs->rowsummary); +} + +// Add a proper name for groupsummary[rownum] +void JustRowname(const char* label, int rownum, int eventnum, const OneSpan& item, GroupSummary* groupsummary) { + if (rownum < 0) {return;} + // if ((item.name == "-idle-") && (rownum != 0)) {return;} + + if (groupsummary->find(rownum) == groupsummary->end()) { + // Add new row and name it + RowTotal temp; + temp.lo_ts = item.start_ts; + temp.hi_ts = item.start_ts; + + temp.rownum = rownum; + temp.rowcount = 1; + temp.proper_row_name = true; + temp.row_name.clear(); + temp.row_name = item.name; +//CheckRowname("c", temp.row_name); + temp.rowsummary.clear(); +//fprintf(stderr, "Just lo/hi_ts[%s] = %12.8f %12.8f\n", +//temp.row_name.c_str(), temp.lo_ts, temp.hi_ts); + + (*groupsummary)[rownum] = temp; +if (verbose) fprintf(stdout, "%s JustRowname[%d] = %s\n", label, rownum, item.name.c_str()); + } else if ((*groupsummary)[rownum].proper_row_name == false) { + (*groupsummary)[rownum].proper_row_name = true; + (*groupsummary)[rownum].row_name = item.name; +//CheckRowname("d", item.name); + +if (verbose) fprintf(stdout, "%s JustRowname [%d] = %s\n", label, rownum, item.name.c_str()); + } +} + +// BUG: This previously overwrote main user execution if exact duplicate name +void InsertOneRowMarkers(RowTotal* rowtotal) { + // Marker at front of first item in row, giving row label + EventTotal left_marker; + left_marker.start_ts = 0.0; + left_marker.duration = 0.0; + left_marker.ipcsum = 0.0; + left_marker.eventnum = KUTRACE_LEFTMARK; + left_marker.arg = 0; + // Space character makes unique, avoiding overwrite + left_marker.event_name = rowtotal->row_name + " "; +//if (!CheckRowname("f", rowtotal->row_name)) {DumpOneRow(stderr, *rowtotal);} + + (rowtotal->rowsummary)[left_marker.event_name] = left_marker; +} + +void InsertPerRowMarkers(GroupSummary* groupsummary) { + for (GroupSummary::iterator it = groupsummary->begin(); it != groupsummary->end(); ++it) { + RowTotal* rowtotal = &it->second; + InsertOneRowMarkers(rowtotal); + } +} + +void InsertPerRowMarkers2(GroupSummary2* groupsummary) { + for (GroupSummary2::iterator it = groupsummary->begin(); it != groupsummary->end(); ++it) { + RowTotal* rowtotal = &it->second; + InsertOneRowMarkers(rowtotal); + } +} + +void InsertRowMarkers(Summary* summ) { +//fprintf(stderr, "InsertRowMarkers\n"); + InsertPerRowMarkers(&summ->cpuprof); + InsertPerRowMarkers(&summ->pidprof); + InsertPerRowMarkers(&summ->rpcprof); + InsertPerRowMarkers2(&summ->cpuprof2); + InsertPerRowMarkers2(&summ->pidprof2); + InsertPerRowMarkers2(&summ->rpcprof2); +} + +////inline int PackIpc(int num, int ipc) {return (num << 4) | ipc;} + +// Rowname for a CPU is the cpu number in Ascii, added later +// Rowname for a PID is the firt user-mode execution span name +// Rowname for an RPC is the first rpcreq/resp span name +// The row name span may well occur after the the first mention of that row, +// so we have the just-rowname logic +// +// For each item, accumulate it in per-CPU, per-PID, and per-RPC summaries +// +void SummarizeItem(const OneSpan& item, Summary* summary) { + // Accumulate time in each group + if (IsCpuContrib(item)) { + AddItem("ce", item.cpu, item.eventnum, item, &summary->cpuprof); + } + + if (IsPidContrib(item)) { + AddItem("pe", item.pid, item.eventnum, item, &summary->pidprof); + } + + if (IsRpcContrib(item)) { + AddItem("re", item.rpcid, item.eventnum, item, &summary->rpcprof); + } + + // Add any known-good row names + if (IsGoodPidName(item)) { + JustRowname("pe", item.pid, item.eventnum, item, &summary->pidprof); + } + + if (IsGoodRpcName(item)) { + JustRowname("re", item.rpcid, item.eventnum, item, &summary->rpcprof); + } + + +//TODO: if wait item, ok. But if PC_U or PC_K, we want to separate by PC value, which is in the name. Sigh +} + + + +// Close the events array, and prepare for event1 and event2 +void SpliceJson(FILE* f) { + fprintf(f, "],\n"); +} + +// Add dummy entry that sorts last, then close the events array and top-level json +void FinalJson_unused(FILE* f) { + fprintf(f, "[999.0, 0.0, 0, 0, 0, 0, 0, 0, 0, \"\"]\n"); // no comma + fprintf(f, "]}\n"); +} + +// Return true if the event is mark_a mark_b mark_c +inline bool is_mark_abc(uint64 event) {return (event == 0x020A) || (event == 0x020B) || (event == 0x020C);} + + + +void RewriteRowNames(Summary* summ) { + for (GroupSummary::iterator it = summ->cpuprof.begin(); it != summ->cpuprof.end(); ++it) { + it->second.row_name = IntToString(it->first); // The CPU number +//fprintf(stderr, "cpuprof number %d\n", it->first); +//CheckRowname("e", it->second.row_name); + } +} + + + + + +static const int kMaxBufferSize = 256; + +// Read next line, stripping any crlf. Return false if no more. +bool ReadLine(FILE* f, char* buffer, int maxsize) { + char* s = fgets(buffer, maxsize, f); + if (s == NULL) {return false;} + int len = strlen(s); + // Strip any crlf or cr or lf + if (s[len - 1] == '\n') {s[--len] = '\0';} + if (s[len - 1] == '\r') {s[--len] = '\0';} + return true; +} + +// Input is tail end of a line: "xyz..."], +// Output is part between quotes. Naive about backslash. +string StripQuotes(const char* s) { + bool instring = false; + string retval; + for (int i = 0; i < strlen(s); ++i) { + char c = s[i]; + if (c =='"') {instring = !instring; continue;} + if (instring) {retval.append(1, c);} + } + return retval; +} + +// Input is a json file of spans +// start time and duration for each span are in seconds +// Output is a smaller json file of fewer spans with lower-resolution times +void Usage() { + fprintf(stderr, "Usage: spantoprof [-row | -group] [-all] [-v] \n"); + exit(0); +} + +// +// Filter from stdin to stdout +// +int main (int argc, const char** argv) { + if (argc < 0) {Usage();} + + for (int i = 1; i < argc; ++i) { + if (strcmp(argv[i], "-row") == 0) {dorow = true; dogroup = false;} + else if (strcmp(argv[i], "-group") == 0) {dogroup = true; dorow = false;} + else if (strcmp(argv[i], "-all") == 0) {doall = true;} + else if (strcmp(argv[i], "-v") == 0) {verbose = true;} + else Usage(); + } + + // expecting: + // ts dur cpu pid rpc event arg ret ipc name--------------------> + // [ 22.39359781, 0.00000283, 0, 1910, 0, 67446, 0, 256, 1, "gnome-terminal-.1910"], + + char buffer[kMaxBufferSize]; + bool needs_presorted = true; + bool do_copy = true; + while (ReadLine(stdin, buffer, kMaxBufferSize)) { + char buffer2[256]; + buffer2[0] = '\0'; + OneSpan onespan; + char tempname[64]; + tempname[0] = '\0'; + int n = sscanf(buffer, "[%lf, %lf, %d, %d, %d, %d, %d, %d, %d, %s", + &onespan.start_ts, &onespan.duration, + &onespan.cpu, &onespan.pid, &onespan.rpcid, + &onespan.eventnum, &onespan.arg, &onespan.retval, &onespan.ipc, tempname); + + // If not a span, copy and go on to the next input line + // This does all the leading JSON up to an including "events" : [ + if (do_copy && (n < 10)) { + // Insert "presorted" JSON line in alphabetical order. + if (needs_presorted && (memcmp(buffer, kPresorted, 12) > 0)) { + fprintf(stdout, "%s : 1,\n", kPresorted); + needs_presorted = false; + } + fprintf(stdout, "%s\n", buffer); + continue; + } + + // We got past the initial JSON. Do not copy any more input lines + do_copy = false; + +if (verbose) {fprintf(stdout, "==%s\n", buffer);} + + onespan.name = StripQuotes(tempname); + // Fixup freq to give unique names (moved back to rawtoevent now) + if (IsAFreq(onespan) && (strchr(tempname, '_') == NULL)) { + onespan.name = onespan.name + "_" + IntToString(onespan.arg); + } + // Fixup lock try to give unique names + if (IsALockTry(onespan)) { + onespan.name[0] = '~'; // Distinguish try ~ from held = + } + SummarizeItem(onespan, &summary); // Build aggregates as we go + } + + // All the input is read + if (verbose) { + fprintf(stderr, "Begin DumpSummary\n"); + DumpSummary(stderr, summary); + DumpSummary2(stderr, summary); + fprintf(stderr, "End DumpSummary\n"); + } + + RewriteRowNames(&summary); // Must precede AggregateRows + //DumpSummary(stderr, summary); + //DumpSummary2(stderr, summary); + + MergeRows(&summary); // Must precede InsertRowMarkers, WriteStartTimes + // lo_ts and hi_ts are not filled in yet + if (verbose) { + DumpSummary(stderr, summary); + DumpSummary2(stderr, summary); + } + + InsertRowMarkers(&summary); + //DumpSummary(stderr, summary); + //DumpSummary2(stderr, summary); + + RewriteStartTimes(&summary); // Must precede WriteSummaryJson + // Fills in lo_ts and hi_ts + //DumpSummary(stderr, summary); + //DumpSummary2(stderr, summary); + + SortAllRows(&summary); + + PruneGroups(&summary); + //DumpSummary2(stderr, summary); + + if (dorow) { + WriteSummaryJsonRow(stdout, summary); + } + if (dogroup) { + WriteSummaryJsonGroup(stdout, summary); + } + + fprintf(stderr, "spantoprof: %d events\n", output_events); + + return 0; +} diff --git a/book-user-code/spantospan.cc b/book-user-code/spantospan.cc new file mode 100644 index 000000000000..2abee9e5c971 --- /dev/null +++ b/book-user-code/spantospan.cc @@ -0,0 +1,233 @@ +// Little program to turn per-CPU timespans into fewer larger-granularity spans +// Copyright 2021 Richard L. Sites +// +// Filter from stdin to stdout +// One command-line parameter -- +// granularity in microseconds. zero means 1:1 passthrough +// +// dick sites 2016.11.07 +// dick sites 2017.08.16 +// Updated to json format in/out text +// dick sites 2017.11.18 +// add optional instructions per cycle IPC support +// + +#include +#include + +#include +#include // exit +#include +#include "basetypes.h" + +#define UserPidNum 0x200 + +using std::string; +using std::map; + +typedef struct { + double start_ts; // Seconds + double duration; // Seconds + int64 start_ts_ns; + int64 duration_ns; + int cpu; + int pid; + int rpcid; + int event; + int arg; + int retval; + int ipc; + char name[64]; +} OneSpan; + +typedef map SpanMap; // Accumulated duration for each event + +typedef struct { + int64 next_ts_ns; + int64 total_excess_ns; + SpanMap spanmap; +} CPUstate; + + +int output_events = 0; + +// Accumulate a span, incrementing the excess not-yet-output times +void AddSpan(const OneSpan& onespan, CPUstate* cpustate) { + int event = onespan.event; + SpanMap::iterator it = cpustate->spanmap.find(event); + if (it == cpustate->spanmap.end()) { + // Make a new entry + OneSpan temp; + temp = onespan; + temp.duration_ns = 0; // Updated below + cpustate->spanmap[event] = temp; + it = cpustate->spanmap.find(event); + } + OneSpan* addspan = &it->second; + addspan->duration_ns += onespan.duration_ns; // Everything else ignored + cpustate->total_excess_ns += onespan.duration_ns; +} + +OneSpan* FindLargestExcess(SpanMap& spanmap) { + int max_excess = 0; + OneSpan* retval = NULL; + for (SpanMap::iterator it = spanmap.begin(); it != spanmap.end(); ++it) { + if (max_excess < it->second.duration_ns) { + max_excess = it->second.duration_ns; + retval = &it->second; + } + } + return retval; +} + +// Round toward zero +inline int64 RoundDown(int64 a, int64 b) { + return (a / b) * b; +} + +// Round up or down +inline int64 Round(int64 a, int64 b) { + return ((a + (b/2)) / b) * b; +} + +// Each call will update the current duration for this CPU and emit it +void ProcessSpan(int64 output_granularity_ns, + const OneSpan& onespan, CPUstate* cpustate) { + int cpu = onespan.cpu; + if (cpustate[cpu].next_ts_ns < 0) { + cpustate[cpu].next_ts_ns = RoundDown(onespan.start_ts_ns, output_granularity_ns); + } + AddSpan(onespan, &cpustate[cpu]); + while (cpustate[cpu].total_excess_ns > output_granularity_ns) { + OneSpan* subspan = FindLargestExcess(cpustate[cpu].spanmap); + if (subspan == NULL) {break;} + // Output this span, setting start time and duration to + // multiples of output_granularity_ns. + + // If this rounds up, residual duration is negative + int64 duration_ns = Round(subspan->duration_ns, output_granularity_ns); + if (duration_ns <= 0) {break;} + // Name has trailing punctuation, including ], + fprintf(stdout, "[%12.8f, %10.8f, %d, %d, %d, %d, %d, %d, %d, %s\n", + cpustate[cpu].next_ts_ns / 1000000000.0, duration_ns / 1000000000.0, + subspan->cpu, subspan->pid, subspan->rpcid, subspan->event, + subspan->arg, subspan->retval, subspan->ipc, subspan->name); + ++output_events; + subspan->duration_ns -= duration_ns; + cpustate[cpu].next_ts_ns += duration_ns; + cpustate[cpu].total_excess_ns -= duration_ns; + } +} + +// Add dummy entry that sorts last, then close the events array and top-level json +// Version 3 with IPC +void FinalJson(FILE* f) { + fprintf(f, "[999.0, 0.0, 0, 0, 0, 0, 0, 0, 0, \"\"]\n"); // no comma + fprintf(f, "]}\n"); +} + + +static const int kMaxBufferSize = 256; + +// Read next line, stripping any crlf. Return false if no more. +bool ReadLine(FILE* f, char* buffer, int maxsize) { + char* s = fgets(buffer, maxsize, f); + if (s == NULL) {return false;} + int len = strlen(s); + // Strip any crlf or cr or lf + if (s[len - 1] == '\n') {s[--len] = '\0';} + if (s[len - 1] == '\r') {s[--len] = '\0';} + return true; +} + +// Input is a json file of spans +// start time and duration for each span are in seconds +// Output is a smaller json file of fewer spans with lower-resolution times +void Usage() { + fprintf(stderr, "Usage: spantospan resolution_usec [start_sec [stop_sec]]\n"); + exit(0); +} + +// +// Filter from stdin to stdout +// +int main (int argc, const char** argv) { + CPUstate cpustate[16]; + // Internally, we keep everything as integer nanoseconds to avoid roundoff + // error and to give clean truncation + int64 output_granularity_ns = 1; + + if (argc < 2) {Usage();} + output_granularity_ns = 1000 * atoi(argv[1]); + + // Initialize to half-full + for (int i = 0; i < 16; ++i) { + cpustate[i].next_ts_ns = -1; + cpustate[i].total_excess_ns = output_granularity_ns / 2; + cpustate[i].spanmap.clear(); + } + + // expecting: + // ts dur cpu pid rpc event arg retval ipc name + // [ 22.39359781, 0.00000283, 0, 1910, 0, 67446, 0, 256, 3, "gnome-terminal-.1910"], + + char buffer[kMaxBufferSize]; + while (ReadLine(stdin, buffer, kMaxBufferSize)) { + // zero granularity means 1:1 passthrough + if (output_granularity_ns == 0) { + fprintf(stdout, "%s\n", buffer); + if (buffer[0] == '[') {++output_events;} + continue; + } + + char buffer2[256]; + buffer2[0] = '\0'; + OneSpan onespan; + int n = sscanf(buffer, "[%lf, %lf, %d, %d, %d, %d, %d, %d, %d, %s", + &onespan.start_ts, &onespan.duration, + &onespan.cpu, &onespan.pid, &onespan.rpcid, + &onespan.event, &onespan.arg, &onespan.retval, &onespan.ipc, onespan.name); + // fprintf(stderr, "%d: %s\n", n, buffer); + + if (n < 9) { + // Copy unchanged anything not a span + fprintf(stdout, "%s\n", buffer); + continue; + } + + if (onespan.cpu < 0) {continue;} + + if (onespan.start_ts >= 999.0) {break;} // Always strip 999.0 end marker and stop + + if (16 <= onespan.cpu){ + fprintf(stderr, "Bad CPU number at '%s'\n", buffer); + exit(0); + } + + // If the input span is a major marker (i.e. Mark_a _b or _c) keep it now + // And chsange no other state + if ((0x020A <= onespan.event) && (onespan.event <= 0x020C)) { + // Name has trailing punctuation, including ], + fprintf(stdout, "%s\n", buffer); + ++output_events; + continue; + } + + // Make all times nsec + onespan.start_ts_ns = onespan.start_ts * 1000000000.0; + onespan.duration_ns = onespan.duration * 1000000000.0; + + // Event is already composite + ProcessSpan(output_granularity_ns, onespan, &cpustate[0]); + } + + // Add marker and closing at the end + // zero granularity means 1:1 passthrough + if (output_granularity_ns != 0) { + FinalJson(stdout); + } + + fprintf(stderr, "spantospan: %d events\n", output_events); + + return 0; +} diff --git a/book-user-code/spantotrim.cc b/book-user-code/spantotrim.cc new file mode 100644 index 000000000000..8cffe1eace1b --- /dev/null +++ b/book-user-code/spantotrim.cc @@ -0,0 +1,170 @@ +// Little program to filter time range in per-CPU timespans +// Copyright 2021 Richard L. Sites +// +// Filter from stdin to stdout +// One or two command-line parameters -- +// stat_second [stop_second] +// +// dick sites 2016.11.07 +// dick sites 2017.08.16 +// Cloned from json format spantospan +// dick sites 2017.09.01 +// Add trim by mark_abc label +// dick sites 2017.11.18 +// add optional instructions per cycle IPC support +// +// +// Compile with g++ -O2 spantotrim.cc from_base40.cc -o spantotrim +// + +#include +#include + +#include +#include // exit +#include +#include "basetypes.h" +#include "from_base40.h" + +using std::string; +using std::map; + +typedef struct { + double start_ts; // Seconds + double duration; // Seconds + int64 start_ts_ns; + int64 duration_ns; + int cpu; + int pid; + int rpcid; + int event; + int arg; + int retval; + int ipc; + char name[64]; +} OneSpan; + +static int incoming_version = 0; // Incoming version number, if any, from ## VERSION: 2 +static int incoming_flags = 0; // Incoming flags, if any, from ## FLAGS: 128 + +// Add dummy entry that sorts last, then close the events array and top-level json +void FinalJson(FILE* f) { + fprintf(f, "[999.0, 0.0, 0, 0, 0, 0, 0, 0, 0, \"\"]\n"); // no comma + fprintf(f, "]}\n"); +} + +// Return true if the event is mark_a mark_b mark_c +inline bool is_mark_abc(uint64 event) {return (event == 0x020A) || (event == 0x020B) || (event == 0x020C);} + +static const int kMaxBufferSize = 256; + +// Read next line, stripping any crlf. Return false if no more. +bool ReadLine(FILE* f, char* buffer, int maxsize) { + char* s = fgets(buffer, maxsize, f); + if (s == NULL) {return false;} + int len = strlen(s); + // Strip any crlf or cr or lf + if (s[len - 1] == '\n') {s[--len] = '\0';} + if (s[len - 1] == '\r') {s[--len] = '\0';} + return true; +} + +// Input is a json file of spans +// start time and duration for each span are in seconds +// Output is a smaller json file of fewer spans with lower-resolution times +void Usage() { + fprintf(stderr, "Usage: spantotrim label | start_sec [stop_sec]\n"); + exit(0); +} + +// +// Filter from stdin to stdout +// +int main (int argc, const char** argv) { + double start_sec = 0.0; + double stop_sec = 999.0; + char label[8]; + char notlabel[8]; + // Default: label filter is a nop + bool inside_label_span = true; + bool next_inside_label_span = true; + + if (argc < 2) {Usage();} + + if ('9' < argv[1][0]) { + // Does not start with a digit. Assume it is a label and + // that we should filter + // Mark_abc label .. Mark_abc /label + // inclusive + int len = strlen(argv[1]); + if (len > 6 ) {len = 6;} + memcpy(label, argv[1], len + 1); + memcpy(notlabel + 1, label, len); + notlabel[0] = '/'; + notlabel[7] = '\0'; + inside_label_span = false; + next_inside_label_span = false; + } + + if (inside_label_span && (argc >= 2)) { + int n = sscanf(argv[1], "%lf", &start_sec); + if (n != 1) {Usage();} + } + if (inside_label_span && (argc >= 3)) { + int n = sscanf(argv[2], "%lf", &stop_sec); + if (n != 1) {Usage();} + } + + // expecting: + // ts dur cpu pid rpc event arg ret name--------------------> + // [ 22.39359781, 0.00000283, 0, 1910, 0, 67446, 0, 256, "gnome-terminal-.1910"], + + int output_events = 0; + char buffer[kMaxBufferSize]; + while (ReadLine(stdin, buffer, kMaxBufferSize)) { + char buffer2[256]; + buffer2[0] = '\0'; + OneSpan onespan; + int n = sscanf(buffer, "[%lf, %lf, %d, %d, %d, %d, %d, %d, %d, %s", + &onespan.start_ts, &onespan.duration, + &onespan.cpu, &onespan.pid, &onespan.rpcid, + &onespan.event, &onespan.arg, &onespan.retval, &onespan.ipc, onespan.name); + // fprintf(stderr, "%d: %s\n", n, buffer); + + if (n < 9) { + // Copy unchanged anything not a span + fprintf(stdout, "%s\n", buffer); + continue; + } + if (onespan.start_ts >= 999.0) {break;} // Always strip 999.0 end marker and stop + if (onespan.start_ts < start_sec) {continue;} + if (onespan.start_ts >= stop_sec) {continue;} + + // Keep an eye out for mark_abc + if (is_mark_abc(onespan.event)) { + char temp[8]; + Base40ToChar(onespan.arg, temp); + // Turn on keeping events if we find a mathcing label + if (strcmp(label, temp) == 0) {inside_label_span = true;} + // Defer turning off keeping events so we keep this one + next_inside_label_span = inside_label_span; + if (strcmp(notlabel, temp) == 0) {next_inside_label_span = false;} + } + if (!inside_label_span) {continue;} + + // Name has trailing punctuation, including ], + fprintf(stdout, "[%12.8f, %10.8f, %d, %d, %d, %d, %d, %d, %d, %s\n", + onespan.start_ts, onespan.duration, + onespan.cpu, onespan.pid, onespan.rpcid, onespan.event, + onespan.arg, onespan.retval, onespan.ipc, onespan.name); + ++output_events; + + inside_label_span = next_inside_label_span; + } + + // Add marker and closing at the end + FinalJson(stdout); + fprintf(stderr, "spantotrim: %d events\n", output_events); + + return 0; +} diff --git a/book-user-code/spinlock.cc b/book-user-code/spinlock.cc new file mode 100644 index 000000000000..453f52015c4b --- /dev/null +++ b/book-user-code/spinlock.cc @@ -0,0 +1,89 @@ +// Routines to deal with simple spinlocks +// Copyright 2021 Richard L. Sites +// Quite possibly flawed + +#include "basetypes.h" +#include "dclab_log.h" // for GetUsec() +#include "spinlock.h" +#include "timecounters.h" + +// Global variable. This is constant after startup, so no threading issues +static int kCyclesPerUsec; + +// Read the cycle counter and gettimeofday() close together, returning both +void GetTimePair(int64* usec, int64* cycles) { + uint64 startcy, stopcy; + int64 gtodusec, elapsedcy; + // Do more than once if we get an interrupt or other big delay in the middle of the loop + do { + startcy = GetCycles(); + gtodusec = GetUsec(); + stopcy = GetCycles(); + elapsedcy = stopcy - startcy; + // In a quick test on an Intel i3 chip, GetUsec() took about 150 cycles + // printf("%ld elapsed cycles\n", elapsedcy); + } while (elapsedcy > 10000); // About 4 usec at 2.5GHz + *usec = gtodusec; + *cycles = startcy; +} + +// Loop for 100 ms picking out time of day and cycle counter +// Return measured cycles per usec (expected to be 1000..4000) +int CalibrateCycleCounter() { + int64 base_usec, base_cycles; + int64 usec, cycles; + int64 delta_usec, delta_cycles; + GetTimePair(&base_usec,&base_cycles); + do { + GetTimePair(&usec,&cycles); + delta_usec = usec - base_usec; + delta_cycles = cycles - base_cycles; + } while (delta_usec < 100000); + + kCyclesPerUsec = delta_cycles / delta_usec; + return kCyclesPerUsec; +} + +// Acquire a spinlock, including a memory barrier to prevent hoisting loads +// Returns number of usec spent spinning +int32 AcquireSpinlock(volatile char* lock) { + int32 safety_count = 0; + uint64 startcy = GetCycles(); + char old_value; + do { + while (*lock != 0) { // Spin without writing while someone else holds the lock + ++safety_count; + if (safety_count > 100000000) { + fprintf(stderr, "safety_count exceeded. Grabbing lock\n"); + *lock = 0; + } + } + // Try to get the lock + old_value = __atomic_test_and_set(lock, __ATOMIC_ACQUIRE); + } while (old_value != 0); + // WE got the lock + uint64 stopcy = GetCycles(); + int64 elapsed = stopcy - startcy; + int32 usec = elapsed / kCyclesPerUsec; + return usec; +} + +// Release a spinlock, including a memory barrier to prevent sinking stores +void ReleaseSpinlock(volatile char* lock) { + __atomic_clear(lock, __ATOMIC_RELEASE); +} + + +// The constructor acquires the spinlock and the destructor releases it. +// Thus, just declaring one of these in a block makes the block run *only* when +// holding the lock and then reliably release it at block exit +SpinLock::SpinLock(LockAndHist* lockandhist) { + lockandhist_ = lockandhist; + int32 usec = AcquireSpinlock(&lockandhist_->lock); + ++lockandhist_->hist[FloorLg(usec)]; +} + +SpinLock::~SpinLock() { + ReleaseSpinlock(&lockandhist_->lock); +} + diff --git a/book-user-code/spinlock.h b/book-user-code/spinlock.h new file mode 100644 index 000000000000..e2377485a85b --- /dev/null +++ b/book-user-code/spinlock.h @@ -0,0 +1,45 @@ +// Routines to deal with simple spinlocks, using Gnu C intrinsics +// Copyright 2021 Richard L. Sites + +#ifndef __SPINLOCK_H__ +#define __SPINLOCK_H__ + +#include "basetypes.h" + +typedef struct { + volatile char lock; // One-byte spinlock + char pad[7]; // align the histogram + uint32 hist[32]; // histogram of spin time, in buckets of floor(lg(usec)) +} LockAndHist; + +// The constructor for this acquires the spinlock and the destructor releases it. +// Thus, just declaring one of these in a block makes the block run *only* when +// holding the lock and then reliably release it at block exit +class SpinLock { +public: + SpinLock(LockAndHist* lockandhist); + ~SpinLock(); + + LockAndHist* lockandhist_; +}; + +// Return floor log 2 of x, i.e. the number of bits needed to hold x +int32 FloorLg(int32 x); + +// Read the cycle counter and gettimeofday() close together, returning both +void GetTimePair(int64* usec, int64* cycles); + +// Loop for 100 ms picking out time of day and cycle counter +// Return measured cycles per usec (expected to be 1000..4000) +// Sets an internal global variable for AcquireSpinlock +int CalibrateCycleCounter(); + +// Acquire a spinlock, including a memory barrier to prevent hoisting loads +// Returns number of usec spent spinning +int32 AcquireSpinlock(volatile char* lock); + +// Release a spinlock, including a memory barrier to prevent sinking stores +void ReleaseSpinlock(volatile char* lock); + +#endif // __SPINLOCK_H__ + diff --git a/book-user-code/spinlock_fixed.cc b/book-user-code/spinlock_fixed.cc new file mode 100644 index 000000000000..d0dfce7b6ab6 --- /dev/null +++ b/book-user-code/spinlock_fixed.cc @@ -0,0 +1,107 @@ +// Routines to deal with simple spinlocks +// Copyright 2021 Richard L. Sites + +#include "basetypes.h" +#include "dclab_log.h" // for GetUsec() +#include "kutrace_lib.h" +#include "spinlock.h" +#include "timecounters.h" + +// Global variable. This is constant after startup, so no threading issues +static int kCyclesPerUsec; + +// Read the cycle counter and gettimeofday() close together, returning both +void GetTimePair(int64* usec, int64* cycles) { + uint64 startcy, stopcy; + int64 gtodusec, elapsedcy; + // Do more than once if we get an interrupt or other big delay in the middle of the loop + do { + startcy = GetCycles(); + gtodusec = GetUsec(); + stopcy = GetCycles(); + elapsedcy = stopcy - startcy; + // In a quick test on an Intel i3 chip, GetUsec() took about 150 cycles + // printf("%ld elapsed cycles\n", elapsedcy); + } while (elapsedcy > 10000); // About 4 usec at 2.5GHz + *usec = gtodusec; + *cycles = startcy; +} + +// Loop for 100 ms picking out time of day and cycle counter +// Return measured cycles per usec (expected to be 1000..4000) +int CalibrateCycleCounter() { + int64 base_usec, base_cycles; + int64 usec, cycles; + int64 delta_usec, delta_cycles; + GetTimePair(&base_usec,&base_cycles); + do { + GetTimePair(&usec,&cycles); + delta_usec = usec - base_usec; + delta_cycles = cycles - base_cycles; + } while (delta_usec < 100000); + + kCyclesPerUsec = delta_cycles / delta_usec; + return kCyclesPerUsec; +} + +// Acquire a spinlock, including a memory barrier to prevent hoisting loads +// Returns number of usec spent spinning +int32 AcquireSpinlock(volatile char* lock) { + int32 safety_count = 0; + bool was_set; + // Try once -- so uncontended case is fast + was_set = __atomic_test_and_set(lock, __ATOMIC_ACQUIRE); + if (!was_set) { + // We got the lock; zero usec spent spinning + kutrace::mark_b("lock0"); + return 0; + } + + uint64 startcy = GetCycles(); + kutrace::mark_c("spin"); + do { + while (*lock != 0) { // Spin without writing while someone else holds the lock + ++safety_count; + // Put in a marker every 16M iterations + if ((safety_count & 0xffffff) == 0) {kutrace::mark_d(safety_count >> 20);} + // Grab the lock anyway after 500M iterations + if (safety_count > 500000000) { + fprintf(stderr, "safety_count 500M exceeded. Grabbing lock\n"); + kutrace::mark_c("GRAB"); + *lock = 0; + } + } + // Try to get the lock + kutrace::mark_c("try"); + was_set = __atomic_test_and_set(lock, __ATOMIC_ACQUIRE); + } while (was_set); + kutrace::mark_c("/spin"); + + // We got the lock + uint64 stopcy = GetCycles(); + int64 elapsed = stopcy - startcy; + int32 usec = elapsed / kCyclesPerUsec; + kutrace::mark_b("lock"); + return usec; +} + +// Release a spinlock, including a memory barrier to prevent sinking stores +void ReleaseSpinlock(volatile char* lock) { + __atomic_clear(lock, __ATOMIC_RELEASE); + kutrace::mark_b("/lock"); +} + + +// The constructor acquires the spinlock and the destructor releases it. +// Thus, just declaring one of these in a block makes the block run *only* when +// holding the lock and then reliably release it at block exit +SpinLock::SpinLock(LockAndHist* lockandhist) { + lockandhist_ = lockandhist; + int32 usec = AcquireSpinlock(&lockandhist_->lock); + ++lockandhist_->hist[FloorLg(usec)]; +} + +SpinLock::~SpinLock() { + ReleaseSpinlock(&lockandhist_->lock); +} + diff --git a/book-user-code/tcpalign.cc b/book-user-code/tcpalign.cc new file mode 100644 index 000000000000..cb906b389263 --- /dev/null +++ b/book-user-code/tcpalign.cc @@ -0,0 +1,393 @@ +// Little program to time-align a TCP dump trace with a KUtrace +// Copyright 2021 Richard L. Sites +// +// Inputs: +// foo_pcap.json file derived from a tcpdump trace via pcaptojson +// ku_foo.json file created by eventtospan, containing RPCs +// +// Method: +// The pcap file has thousands of RPC message events with tcpdump timestamps, +// which unfortunately are not from the same time base as user-mode gettimeofday +// used in KUtrace. +// +// So we look for syswrites in the KUtrace that are within an RPC response +// time range, and look for the corresponding message by RPCID in the tcpdump +// trace. Near the beginning of the traces and near the end, we calculate the +// time offset that when added to the tcpdump times will place the outbound +// message on the wire about 2 usec after the start of the write(). If there +// is drift between the time bases, the early and late offsets will differ +// somewhat. Responses more closely align between write and wire than requests +// between wire and read. +// +// In addition, the KUtrace file has timestmaps starting at a minute boundary +// as reflected by the basetime. The tcpdump file in general may start +// during a different minute. To get nearby, we assume that the traces are +// time aligned withn +/- 30 seconds of each other. +// +// RPCIDs are only 16 bits, so there will be duplicates in many traces. We +// match up ones that have the smallest absolute time difference. +// +// The two traces will in general only partially overlap, so we search initally +// for an RPCID near the beginning of either that is also found in the other. +// If they overlap at all (and no data is missing), either the first RPC in the +// KUtrace will be in the tcpdump trace, or the first RPC in the tcptrace will +// be in the KUtrace. + +#include +#include +#include +#include +#include + +#include "basetypes.h" +#include "kutrace_lib.h" + +using std::string; + +typedef struct { + double start_ts; // Multiples of 10 nsec + double duration; // Multiples of 10 nsec + int cpu; + int pid; + int rpcid; + int eventnum; + int arg; + int retval; + int ipc; + string name; +} OneSpan; + + +typedef struct { + uint32 rpcid; + double ts; +} RpcTime; + + +static const int kMaxBufferSize = 256; + +static const int write_event = 2049; // Depends on which Linux, see + // kutrace_control_names.h +static const int rx_pkt_event = KUTRACE_RPCIDRXMSG; +static const int tx_pkt_event = KUTRACE_RPCIDTXMSG; + +// We expect the the outbound message to be about 5 usec after the start of the syswrite +static const double write_to_rpc_delay = 0.000005; + +// We expect a good fit to have the new KUtrace - tcpdump difference to be +/- 100 usec +static const double max_fitted_diff = 0.000100; + +typedef struct { + double x; + double y; +} XYPair; + +typedef struct { + double x0; + double y0; + double slope; +} Fit; + +void print_fit(FILE* f, const Fit& fit) { + fprintf(f, "Fit: x0 %10.6f, y0 %10.6f, slope %12.8f\n", fit.x0, fit.y0, fit.slope); +} + +// Calculate a least-squares fit +// Input is an array of k pairs +// Output is x0, y0, and slope such that y = ((x - x0) * slope) + y0 +void get_fit(const XYPair* xypair, int k, Fit* fit) { + // Default to the identity mapping + fit->x0 = 0.0; + fit->y0 = 0.0; + fit->slope = 1.0; + if (k <= 0) {return;} + + double n = 0.0; + double nx = 0.0; + double ny = 0.0; + double nxy = 0.0; + double nxx = 0.0; + + // Avoid precision loss from squaring big numbers by offsetting from first value + double xbase = xypair[0].x; + + // Now calculate the fit + for (int i = 0; i < k; ++i) { + double xi = xypair[i].x - xbase; + double yi = xypair[i].y; + n += 1.0; + nx += xi; + ny += yi; + nxy += xi * yi; + nxx += xi * xi; + } + double num = (n * nxy) - (nx * ny); + double denom = (n * nxx) - (nx * nx); + + fit->x0 = xbase; + if (denom != 0.0) { + fit->slope = num / denom; + } else { + fit->slope = 1.0; + } + fit->y0 = ((ny * nxx) - (nx * nxy)) / denom; +} + +inline double remap(double x, const Fit& fit) {return ((x - fit.x0) * fit.slope) + fit.y0;} + + + +// Read next line, stripping any crlf. Return false if no more. +bool ReadLine(FILE* f, char* buffer, int maxsize) { + char* s = fgets(buffer, maxsize, f); + if (s == NULL) {return false;} + int len = strlen(s); + // Strip any crlf or cr or lf + if (s[len - 1] == '\n') {s[--len] = '\0';} + if (s[len - 1] == '\r') {s[--len] = '\0';} + return true; +} + +string strip_suffix(const string& s) { + size_t period = s.find_last_of('.'); + return s.substr(0, period); +} + +// Expecting hh:mm:ss in s +int get_seconds_in_day(const char* s) { + int hr = atoi(&s[0]); + int min = atoi(&s[3]); + int sec = atoi(&s[6]); + return (hr * 3600) + (min * 60) + sec; +} + +void usage() { + fprintf(stderr, "usage: tcpalign \n"); + exit(0); +} + +int main (int argc, const char** argv) { + // Open files or die + + // Read all the RPC events in KUtrace, recording time w/tracebase of first + // instance of each response write RPCID. + + // Read all the RPC events in tcpdump, recording time w/tcpbase of first + // instance of each response RPC. + + // Scan matching pairs to calculate kutrace - tcpdump time difference + // Maybe Throw out big differences + // Do least-squares fit; print offset and slope w.r.t. first matching RPC + + // Rewrite the tcpdump file + + if (argc <= 2) {usage();} + const char* kutrace_name = argv[1]; + const char* tcpdump_name = argv[2]; + string out_name = strip_suffix(string(tcpdump_name)) + "_align.json"; + + FILE* ku = fopen(kutrace_name, "r"); + if (ku == NULL) {fprintf(stderr, "%s did not open\n", kutrace_name); exit(0);} + FILE* tcp = fopen(tcpdump_name, "r"); + if (tcp == NULL) {fprintf(stderr, "%s did not open\n", tcpdump_name); exit(0);} + + RpcTime ku_rpc[65536]; + RpcTime tcp_rpc[65536]; + memset(ku_rpc, 0, 65536 * sizeof(RpcTime)); + memset(tcp_rpc, 0, 65536 * sizeof(RpcTime)); + + char buffer[kMaxBufferSize]; + char ku_basetime_str[kMaxBufferSize]; + char tcp_basetime_str[kMaxBufferSize]; + double ku_basetime = 0.0; // Seconds within a day + double tcp_basetime = 0.0; // Seconds within a day + + OneSpan span; + char name_buffer[256]; + + // Expecting either (note leading space) + // 0123456789.123456789.123456789.123456789 + // "tracebase" : "2020-08-28_14:18:00", + // "tcpdumpba" : "2020-08-28_14:18:00", + // KUTRACE_RPCIDRXMSG, event 516: + // [ 0.00003500, 0.00000001, 0, 0, 12267, 516, 0, 0, 0, "rpc.12267"], + // KUTRACE_RPCIDTXMSG, event 517: + // [ 0.00017900, 0.00000001, 0, 0, 39244, 517, 4012, 0, 0, "rpc.39244"], + // syswrite, event 2049, rpcid 52790 or whatever: + // [ 56.25728887, 0.00001500, 1, 11903, 52790, 2049, 4, 4100, 1, "write"], + // ts 1 dur 2 CPU 3 pid 4 rpc 5 event + + // Read the kutrace times + //--------------------------------------------------------------------------// + while (ReadLine(ku, buffer, kMaxBufferSize)) { + if (memcmp(buffer, " \"tracebase\"", 12) == 0) { + memcpy(ku_basetime_str, buffer, kMaxBufferSize); + int temp = get_seconds_in_day(&buffer[27]); +fprintf(stderr, "ku_basetime = %s\n", buffer); +fprintf(stderr, "ku_basetime = %02d:%02d:%02d\n", temp/3600, (temp/60)%60, temp%60); + ku_basetime = temp; + continue; + } + if (buffer[0] != '[') {continue;} + + int n = sscanf(buffer, "[%lf,%lf,%d,%d,%d,%d,%d,%d,%d,%s", + &span.start_ts, &span.duration, + &span.cpu, &span.pid, &span.rpcid, + &span.eventnum, &span.arg, &span.retval, &span.ipc, + name_buffer); + if (n != 10) {continue;} + + if (span.eventnum == write_event) { + if (ku_rpc[span.rpcid].ts == 0.0) { // first time only + ku_rpc[span.rpcid].ts = span.start_ts; +fprintf(stdout, "ku_rpc[%d] = %8.6f + %8.6f\n", span.rpcid, span.start_ts, ku_basetime); + } + } + } + fclose(ku); + + // Read the tcpdump times + //--------------------------------------------------------------------------// + while (ReadLine(tcp, buffer, kMaxBufferSize)) { + if (memcmp(buffer, " \"tcpdumpba\"", 12) == 0) { + memcpy(tcp_basetime_str, buffer, kMaxBufferSize); + int temp = get_seconds_in_day(&buffer[27]); +fprintf(stderr, "tcp_basetime = %s\n", buffer); +fprintf(stderr, "tcp_basetime = %02d:%02d:%02d\n", temp/3600, (temp/60)%60, temp%60); + tcp_basetime = temp; + continue; + } + if (buffer[0] != '[') {continue;} + + int n = sscanf(buffer, "[%lf,%lf,%d,%d,%d,%d,%d,%d,%d,%s", + &span.start_ts, &span.duration, + &span.cpu, &span.pid, &span.rpcid, + &span.eventnum, &span.arg, &span.retval, &span.ipc, + name_buffer); + if (n != 10) {continue;} + + if (span.eventnum == tx_pkt_event) { + if (tcp_rpc[span.rpcid].ts == 0.0) { // first time only + tcp_rpc[span.rpcid].ts = span.start_ts; +fprintf(stdout, "tcp_rpc[%d] = %8.6f + %8.6f\n", span.rpcid, span.start_ts, tcp_basetime); + } + } + } + fclose(tcp); + + // See what we have + //--------------------------------------------------------------------------// + bool fail = false; + if (ku_basetime == 0.0) { + fprintf(stderr, "kutrace has no basetime\n"); + fail = true; + } + if (tcp_basetime == 0.0) { + fprintf(stderr, "tcpdump has no basetime\n"); + fail = true; + } + if (600 < abs(ku_basetime - tcp_basetime)) { + fprintf(stderr, "kutrace and tcpdump basetimes differ by more than 10 minutes:\n"); + fprintf(stderr, " kutrace %s\n", ku_basetime_str); + fprintf(stderr, " tcpdump %s\n", tcp_basetime_str); + fail = true; + } + if (fail) {exit(0);} + + // Map tcp times to 5 usec after the ku write() starting times + //--------------------------------------------------------------------------// + int k = 0; + XYPair pair[65536]; + for (int rpcid = 0; rpcid < 65536; ++rpcid) { + if ((ku_rpc[rpcid].ts != 0.0) && (tcp_rpc[rpcid].ts != 0.0)) { + // Map tpc time to the ku start minute + pair[k].x = tcp_rpc[rpcid].ts + (ku_basetime - tcp_basetime); + // Record the incoming ku-tcp offset from write + 5 usec + pair[k].y = (ku_rpc[rpcid].ts + write_to_rpc_delay) - tcp_rpc[rpcid].ts; +fprintf(stdout, " [%d] diffs[%d] = %8.6f (%8.6f - %8.6f)\n", k, rpcid, pair[k].y, ku_rpc[rpcid].ts + write_to_rpc_delay, pair[k].x); + ++k; + } + } + fprintf(stderr, "%d pair matches found\n", k); + + // Fit #1 + //--------------------------------------------------------------------------// + Fit fit; + get_fit(pair, k, &fit); + print_fit(stderr, fit); + + // Fit #2 + //--------------------------------------------------------------------------// + // The fit may well be biased by outlier (typically late packet transmission) + // times, so redo the fit chopping off anything beyond +/- 100 usec + int k2 = 0; + XYPair pair2[65536]; + for (int kk = 0; kk < k; ++kk) { + // if remap moves original offset to perfect alignment, diff = 0; + double diff = pair[kk].y - remap(pair[kk].x, fit); + if (max_fitted_diff < fabs(diff)) {continue;} // Too far away; ignore + pair2[k2++] = pair[kk]; + } + fprintf(stderr, "%d pair2 matches found\n", k2); + + // If we retained at least half the points, re-fit, else leave it alone + if (k <= (k2 * 2)) { + get_fit(pair2, k2, &fit); + print_fit(stderr, fit); + } + + // Write the old and new offsets for a json file + //--------------------------------------------------------------------------// + for (int kk = 0; kk < k; ++kk) { + fprintf(stdout, "[%10.6f, %f, %f],\n", pair[kk].x, pair[kk].y, remap(pair[kk].x, fit)); + } + + // Read the tcp file and write new aligned one + //--------------------------------------------------------------------------// + FILE* tcp2 = fopen(tcpdump_name, "r"); + if (tcp2 == NULL) {fprintf(stderr, "%s did not open\n", tcpdump_name); exit(0);} + + FILE* out = fopen(out_name.c_str(), "w"); + if (out == NULL) {fprintf(stderr, "%s did not open\n", out_name.c_str()); exit(0);} + + // Read the tcpdump times, remap, and write aligned file + while (ReadLine(tcp2, buffer, kMaxBufferSize)) { + // Copy everything that is not a span + if (buffer[0] != '[') { + fprintf(out, "%s\n", buffer); + continue; + } + + int n = sscanf(buffer, "[%lf,%lf,%d,%d,%d,%d,%d,%d,%d,%s", + &span.start_ts, &span.duration, + &span.cpu, &span.pid, &span.rpcid, + &span.eventnum, &span.arg, &span.retval, &span.ipc, + name_buffer); + if (n != 10) { + fprintf(out, "%s\n", buffer); + continue; + } + + // Align the time, except for the 999.0 end marker + if (span.start_ts != 999.0) { + double old_ts = span.start_ts; + span.start_ts += remap(span.start_ts, fit); +//fprintf(stdout, "[%d] %f ==> %f (%f)\n", span.rpcid, old_ts, span.start_ts, adjust_offset); +//fprintf(stdout, [%f, %f, %f],\n", old_ts, span.start_ts, adjust_offset); + + } + + fprintf(out, "[%12.8f, %10.8f, %d, %d, %d, %d, %d, %d, %d, %s\n", + span.start_ts, span.duration, + span.cpu, span.pid, span.rpcid, + span.eventnum, span.arg, span.retval, span.ipc, + name_buffer); + } + fclose(tcp2); + fclose(out); + + fprintf(stderr, " %s written\n", out_name.c_str()); + return 0; +} + + diff --git a/book-user-code/time_getpid.cc b/book-user-code/time_getpid.cc new file mode 100644 index 000000000000..3b042ff66fdf --- /dev/null +++ b/book-user-code/time_getpid.cc @@ -0,0 +1,162 @@ +// Little program to time the KUtrace overhead of the shortest system call +// and of the kutrace::mark_a call. Keep in mind the distortion that can happen +// if run on an idle machine with power-saving slow CPU clocks. +// +// Remember that every call creates TWO KUtrace events, so divide the +// time difference by two after running without, with go, with goipc. + +// Copyright 2021 Richard L. Sites + +// Compile with g++ -O2 time_getpid.cc kutrace_lib.cc -o time_getpid + +// Do 100k getpid() calls +// so we can time these with and without tracing to see the tracing overhead +// dsites 2016.10.13 +// +// Intel(R) Celeron(R) CPU G1840 @ 2.80GHz +// no trace 52ns +// go 82ns (15ns per event) +// goipc 126ns (37ns per event, 2.5x more trace overhead) +// +// + + +// 2018.01.27 After re-mounting heat sink +// Intel(R) Core(TM) i3-7100 CPU @ 3.90GHz +// no trace +// 100000 calls to getpid() took 7992 us (79,72,73,74,74 ns each) [min 72] +// 100000 calls to mark_a took 3956 us (39,39,39,39,39 ns each) [min 39] +// trace +// 100000 calls to getpid() took 10162 us (101,100,96,94,96 ns each) [min 94] +22ns/pair +// 100000 calls to mark_a took 5411 us (54,54,54,54,54 ns each) [min 54] +15ns/single +// trace with IPC +// 100000 calls to getpid() took 13208 us (132,130,129,134,130 ns each) [min 129] +57ns/pair +// 100000 calls to mark_a took 7235 us (72,73,73,73,73 ns each) [min 72] +33ns/single + +// 2020.03.21 baseline RaspberryPi 4B unpatched.These go thru the C runtime library +// while the above use the inline asm version +// 100000 calls to getpid() took 84965 us (849 ns each) 849, 727, 849, 664, 719, 675, 849, 692, 854 +// 100000 calls to mark_a took 39365 us (393 ns each) +// 100000 calls to getpid() took 72724 us (727 ns each) +// 100000 calls to mark_a took 37194 us (371 ns each) +// difference appears to be related to clockrate after warmup +// ./time_getpid & ./time_getpid +// 100000 calls to getpid() took 68860 us (688 ns each) 688, 848, 779 +// 100000 calls to mark_a took 37415 us (374 ns each) +// 100000 calls to getpid() took 68953 us (689 ns each) +// 100000 calls to mark_a took 39218 us (392 ns each) + + +#include +#include + +#include +#include // gettimeofday +#include "basetypes.h" + +#include "kutrace_lib.h" +#include "timecounters.h" + +// On ARM-32 /usr/include/arm-linux-gnueabihf/asm/unistd-common.h +// On ARM-64 linux/arch/arm64/include/asm/unistd32.h +// On x86 /usr/include/x86_64-linux-gnu/asm/unistd_64.h + +#if defined(__aarch64__) +#define __NR_getpid 172 +#elif defined(__ARM_ARCH_ISA_ARM) +#define __NR_getpid 20 +#elif defined(__x86_64__) +#define __NR_getpid 39 +#else +BUILD_BUG_ON_MSG(1, "Define NR_getpid for your architecture"); +#endif + + +// Useful utility routines +//int64 inline GetUsec() {struct timeval tv; gettimeofday(&tv, NULL); +// return (tv.tv_sec * 1000000l) + tv.tv_usec;} + +// getpid doesn't actually have any arguments, but I want to compare time to two-arg gettimeofday +inline int64 DoGP(struct timeval* arg1, void* arg2) +{ +#if defined(__ARM_ARCH_ISA_ARM) && !defined(__aarch64__) + register uint32 _arg1 asm("r0") = (uint32)arg1; + register uint32 _arg2 asm("r1") = (uint32)arg2; + register uint32 ret asm ("r0"); + register uint32 nr asm("r7") = __NR_getpid; + + asm volatile( + " swi #0\n" + : "=r" (ret) + : "r" (_arg1), "r" (_arg2), "r" (nr) + : "memory"); + + return ret; + +#else + + int64 retval; + retval = syscall(__NR_getpid, arg1, arg2); + return retval; + +#endif + +// #if defined(__x86_64__) +// asm volatile +// ( +// "syscall" +// : "=a" (retval) +// : "0"(__NR_getpid), "D"(arg1), "S"(arg2) +// : "cc", "rcx", "r11", "memory" +// ); + +} + +int main (int argc, const char** argv) { + int64 bogus = 0; + + // First warm up, to get the CPU clock up to speed + // No timing here + for (int i = 0; i < 50000 / 4; ++ i) { + struct timeval tv; + bogus += DoGP(&tv, NULL); + bogus += DoGP(&tv, NULL); + bogus += DoGP(&tv, NULL); + bogus += DoGP(&tv, NULL); + } + + int64 start_usec = GetUsec(); + for (int i = 0; i < 100000 / 4; ++ i) { + struct timeval tv; + bogus += DoGP(&tv, NULL); + bogus += DoGP(&tv, NULL); + bogus += DoGP(&tv, NULL); + bogus += DoGP(&tv, NULL); + } + int64 stop_usec = GetUsec(); + + // Keep bogus as a live variable + if (stop_usec == 0) {printf("bogus %d\n", (int)bogus);} + + + // Now time marker inserts + int64 start_usec2 = GetUsec(); + for (int i = 0; i < 100000 / 4; ++ i) { + kutrace::mark_a("hello"); + kutrace::mark_a("hello"); + kutrace::mark_a("hello"); + kutrace::mark_a("hello"); + } + int64 stop_usec2 = GetUsec(); + + + // Print last to avoid printing extending timing + int delta = stop_usec - start_usec; + fprintf(stdout, "100000 calls to getpid() took %d us (%d ns each)\n", delta, delta / 100); + fprintf(stdout, " Note that each call generates TWO KUtrace events\n"); + + int delta2 = stop_usec2 - start_usec2; + fprintf(stdout, "100000 calls to mark_a took %d us (%d ns each)\n", delta2, delta2 / 100); + + return 0; +} diff --git a/book-user-code/timealign.cc b/book-user-code/timealign.cc new file mode 100644 index 000000000000..f869f5cbee63 --- /dev/null +++ b/book-user-code/timealign.cc @@ -0,0 +1,652 @@ +// timealign.cc +// Little program to align two or more RPC logs +// Copyright 2021 Richard L. Sites +// +// compile with g++ -O2 timealign.cc dclab_log.cc dclab_rpc.cc kutrace_lib.cc -o timealign + +#include +#include + +#include +#include + +#include "basetypes.h" +#include "dclab_log.h" +#include "dclab_rpc.h" + +using std::map; +using std::string; + +// Assumed Ethernet speed in gigabits per second +static const int64 kGbs = 1; + +// Assumed RPC message overhead, in addition to pure data +static const int64 kMsgOverheadBytes = 100; + +// Assumed time for missing transmission or server time, in usec +static const int kMissingTime = 2; + +static const int kBucketNum = 8; +static const int64 kEmptyBucket = 999999999; + +// 2**0.0 through 2** 0.9 +static const double kPowerTwoTenths[10] = { + 1.0000, 1.0718, 1.1487, 1.2311, 1.3195, + 1.4142, 1.5157, 1.6245, 1.7411, 1.8661 +}; + +// This keeps track of one time-offset entry between clocks on two machines +// These ALWAYS refer to mapping server time to client time +typedef struct { + int64 slop; + int64 tfrom; + int64 deltamin; + int64 deltamax; +} Bucket; + +// This records the alignment parameters to map time offsets for one machine pair +// delta_yi = b + m * (yi - y0) // The thing to add to yi to get yi' +// yi' = yi + delta_yi +// To backmap +// yi' = yi + (b + m * (yi - y0)) +// yi = yi' - (b + m * (yi - y0)) +// delta_yi' = - (b + m * (yi - y0)) // The thing to add to yi' to get yi +// delta_yi' = - (b + m * ((yi' + delta_yi') - y0)) +// delta_yi' = - (b + m * yi' + m * delta_yi' - m * y0) +// delta_yi' = - b - m * yi' - m * delta_yi' + m * y0 +// delta_yi' + m * delta_yi' = -b - m * (yi' - y0) +// delta_yi' * (1 + m) = -b - m * (yi' - y0) +// delta_yi' = (-b - m * (yi' - y0)) / (1 + m) +// delta_yi' = -b / (1 + m) - (m / (1 + m)) * (yi' - y0)) +// yi = yi' - delta_yi' +// +// This could either reflect mapping client to server or server to client, or +// either one to the time of some third machine +typedef struct { + int64 y0; // usec since the epoch + double m; // slope of clock frequency difference, likely near zero + double b; // clock offset at y0, in usec +} Alignment; + +// This keeps track of one small array of time offsets for one machine pair, +// plus the calculated time-alignment parameters +typedef struct { + int64 entrynum; + int64 bucket_shift; + bool time_mapping_assigned; // Final mappings to some base ip assigned + Bucket buckets[kBucketNum]; + Alignment t14_alignment; // Map client times to some base ip time (initially identity) + Alignment t23_alignment; // Map server times to some base ip time (initially this client) +} BucketStruct; + +// This keeps track of arrays of time offsets and alignments for multiple machine pairs +// +// The key is (machine1_ip << 32) | machine2_ip with the smaller +// ip value as machine1 (canonical form) +// +// Time alignments will eventually map all times for all machines to +// the lowest ip address encountered +// +typedef map BucketMap; +typedef map IpToAlignment; + +static char datebuf[64]; + +int64 imax(int64 a, int64 b) {return (a >= b) ? a : b;} + +const char* datetostr(int64 timeusec) { + int hr = (timeusec / 3600000000) % 24; + int min = (timeusec / 60000000) % 60; + int sec = (timeusec / 1000000) % 60; + int usec = timeusec % 1000000; + sprintf(datebuf, "%02d:%02d:%02d.%06d", hr, min, sec, usec); + return datebuf; +} + +void DumpAlignment(FILE* f, const Alignment* alignment) { + fprintf(f, "y0 %s offset %5.1fus slope %5.2fus/sec\n", + datetostr(alignment->y0), alignment->b, alignment->m * 1000000.0); +} + +void DumpAlignments(FILE* f, const BucketStruct* cur_pair) { + fprintf(f, " t14_alignment "); DumpAlignment(f, &cur_pair->t14_alignment); + fprintf(f, " t23_alignment "); DumpAlignment(f, &cur_pair->t23_alignment); +} + +void InitBuckets(int64 cur_bucket, Bucket* buckets) { + for (int i = cur_bucket; i < kBucketNum; ++i) { + buckets[i].slop = kEmptyBucket; + buckets[i].tfrom = 0; + buckets[i].deltamin = 0; + buckets[i].deltamax = 0; + } +} + +void InitAlignment(Alignment* alignment) { + alignment->y0 = 0.0; + alignment->m = 0.0; + alignment->b = 0.0; +} + +void InitBucketStruct(BucketStruct* cur_pair) { + cur_pair->entrynum = 0; + cur_pair->bucket_shift = 0; + cur_pair->time_mapping_assigned = false; + InitBuckets(0, cur_pair->buckets); + InitAlignment(&cur_pair->t14_alignment); + InitAlignment(&cur_pair->t23_alignment); +} + +void DumpBuckets(FILE* f, const Bucket* buckets) { + fprintf(f, "\nDumpbuckets\n"); + for (int i = 0; i < kBucketNum; ++i) { + fprintf(f, "[%d] slop/tfrom/delta %s %lld %lld %lld..%lld = %lld\n", + i, datetostr(buckets[i].tfrom), + buckets[i].slop, buckets[i].tfrom, + buckets[i].deltamin, buckets[i].deltamax, + (buckets[i].deltamin + buckets[i].deltamax) / 2); + } +} + +void DumpBucketStruct(FILE* f, const BucketStruct* cur_pair) { + fprintf(f, "\nDumpbucketStruct\n"); + fprintf(f, " entrynum %lld\n", cur_pair->entrynum); + fprintf(f, " bucket_shift %lld\n", cur_pair->bucket_shift); + fprintf(f, " time_mapping_assigned %d\n", cur_pair->time_mapping_assigned); + if (1 < cur_pair->entrynum) { + DumpBuckets(f, cur_pair->buckets); + DumpAlignments(f, cur_pair); + } +} + +// return 2 * (x/10) +int64 ExpTenths(uint8 x) { + int64 powertwo = x / 10; + int64 fraction = x % 10; + int64 retval = 1l << powertwo; + retval *= kPowerTwoTenths[fraction]; + return retval; +} + +// Return sec to transmit x bytes at y Gb/s, where 1 Gb/s = 125000000 B/sec +// but we assume we only get about 95% of this for real data, so 120 B/usec +int64 BytesToUsec(int64 x) { + int64 retval = x * kGbs / 120; + return retval; +} + +int64 RpcMsglglenToUsec(uint8 lglen) { + return BytesToUsec(ExpTenths(lglen) + kMsgOverheadBytes); +} + +// Given alignment parameters x2y, calculate y to x +// See algebra up at the top +void InvertAlignment(const Alignment* xtoy, Alignment* ytox) { + ytox->y0 = xtoy->y0; + ytox->m = -xtoy->m / (1.0 + xtoy->m); + ytox->b = -xtoy->b / (1.0 + xtoy->m); +fprintf(stdout, " Invert xtoy "); DumpAlignment(stdout, xtoy); +fprintf(stdout, " ytox "); DumpAlignment(stdout, ytox); +} + +// Given alignment parameters x2y, and ytoz, calculate xtoz +// Algebra here: +// t1' = t1 + xtoy->m * (t1 - xtoy->y0) + xtoy->b +// t1'' = t1' + ytoz->m * (t1' - ytoz->y0) + ytoz->b +// +// t1'' = t1 + (xtoy->m * (t1 - xtoy->y0) + xtoy->b) + +// ytoz->m * ((t1 + xtoy->m * (t1 - xtoy->y0) + xtoy->b) - ytoz->y0) + +// ytoz->b +// t1'' = t1 + (xtoy->m * t1 - xtoy->m * xtoy->y0 + xtoy->b) + +// ytoz->m * (t1 + xtoy->m * t1 - xtoy->m * xtoy->y0 + xtoy->b - ytoz->y0) + +// ytoz->b +// t1'' = t1 + (xtoy->m * t1 - xtoy->m * xtoy->y0 + xtoy->b) + +// (ytoz->m * t1 + ytoz->m * xtoy->m * t1 - ytoz->m * xtoy->m * xtoy->y0 + ytoz->m * xtoy->b - ytoz->m * ytoz->y0) + +// ytoz->b +// +// We want +// t1'' = t1 + xtoz->m * (t1 - xtoz->y0) + xtoz->b +// +// So +// xtoz->m = (xtoy->m + ytoz->m + ytoz->m * xtoy->m) +// xtoz->y0 = (xtoy->m * xtoy->y0 + ytoz->m * xtoy->m * xtoy->y0 + ytoz->m * ytoz->y0) / xtoz->m +// xtoz->b = (xtoy->b + ytoz->m * xtoy->b + ytoz->b) +// +// Update-in-place OK if xtoz is one of the other two mappings +void MergeAlignment(const Alignment* xtoy, const Alignment* ytoz, Alignment* xtoz) { + Alignment temp; + temp.m = xtoy->m + ytoz->m + ytoz->m * xtoy->m; + if (temp.m == 0.0) { + temp.y0 = 0; + } else { + temp.y0 = (xtoy->m * xtoy->y0 + ytoz->m * xtoy->m * xtoy->y0 + ytoz->m * ytoz->y0) / temp.m; + } + temp.b = xtoy->b + ytoz->b + ytoz->m * xtoy->b; +fprintf(stdout, " Merge xtoy "); DumpAlignment(stdout, xtoy); +fprintf(stdout, " ytoz "); DumpAlignment(stdout, ytoz); +fprintf(stdout, " xtoz "); DumpAlignment(stdout, &temp); + *xtoz = temp; +} + + +// From one set of buckets, calculate best fit line for delta_yi, +// to turn server times into client times. +// If the server_is_smaller_ip, invert this mapping and make it +// turn client times into server times. +// In both cases, the remapped times will be in the time domain of the smaller ip address +void Fit(BucketStruct* cur_pair) { + const Bucket* buckets = cur_pair->buckets; + Alignment* alignment = &cur_pair->t23_alignment; + +//VERYTEMP +if (1 < cur_pair->entrynum) {DumpBuckets(stdout, buckets);} + + double n = 0.0; + double x = 0.0; + double y = 0.0; + double xy = 0.0; + double xx = 0.0; + // Make base time the best fit in the first bucket + ////int64 basetime = (buckets[0].tfrom / 60000000) * 60000000; + int64 basetime = buckets[0].tfrom; + // Later: try weighted sums or just double to 16 buckets + for (int i = 0; i < kBucketNum; ++i) { + if (buckets[i].slop == kEmptyBucket) {continue;} + double xi = buckets[i].tfrom - basetime; + double yi = (buckets[i].deltamin + buckets[i].deltamax) / 2.0; + n += 1.0; + x += xi; + y += yi; + xy += xi * yi; + xx += xi * xi; + } + + if (n != 0.0) { + alignment->y0 = basetime; + alignment->m = (n * xy - x * y) / (n * xx - x * x); + alignment->b = (y - alignment->m * x) / n; + } else { + alignment->y0 = 0.0; + alignment->m = 0.0; + alignment->b = 0.0; + } + + for (int i = 0; i < kBucketNum; ++i) { + if (buckets[i].slop == kEmptyBucket) {continue;} + double xi = buckets[i].tfrom - alignment->y0; + ////double yi = (buckets[i].deltamin + buckets[i].deltamax) / 2.0; + double delta_yi = alignment->m * xi + alignment->b; + fprintf(stdout, "%6.1f ", delta_yi); + } + fprintf(stdout, "\n"); + + // Cross-check by looking at the reverse mapping + Alignment temp; + InvertAlignment(alignment, &temp); + + for (int i = 0; i < kBucketNum; ++i) { + if (buckets[i].slop == kEmptyBucket) {continue;} + double xi = buckets[i].tfrom - temp.y0; + ////double yi = (buckets[i].deltamin + buckets[i].deltamax) / 2.0; + double delta_yi = temp.m * xi + temp.b; + double xi_prime = xi + delta_yi; + ////double delta_yi_prime = (temp.m * xi_prime + temp.b) / (1.0 - temp.m); + double delta_yi_prime = temp.m * xi_prime + temp.b; + fprintf(stdout, "%6.1f ", delta_yi_prime); + } + fprintf(stdout, "\n"); +} + +// Produce fname_minus_.xxx || s || .xxx +string FnameAppend(const char* fname, const char* s) { + const char* period = strrchr(fname, '.'); + if (period == NULL) {period = fname + strlen(fname);} + int len = period - fname; + string retval = string(fname, len); + retval += s; + retval += period; + return retval; +} + +inline uint32 Uint32Min(uint32 a, uint32 b) {return (a < b) ? a : b;} +inline uint32 Uint32Max(uint32 a, uint32 b) {return (a > b) ? a : b;} + +void DumpLR(const char* label, BinaryLogRecord* lr) { + fprintf(stdout, "%s %lld %lld %lld %lld\n", label, + lr->req_send_timestamp, lr->req_rcv_timestamp, + lr->resp_send_timestamp, lr->resp_rcv_timestamp); +} + + +// Handle extracting offsets from one log file at a time +void Pass1(const char* fname, BucketMap* bucketmap) { + fprintf(stdout, "\nPass1: %s\n", fname); + + FILE* logfile = fopen(fname, "rb"); + if (logfile == NULL) { + fprintf(stderr, "%s did not open\n", fname); + return; + } + + BinaryLogRecord lr; + int64 basetime = 0; + while(fread(&lr, sizeof(BinaryLogRecord), 1, logfile) != 0) { +////DumpLR("Pass1", &lr); + + // Skip unless at least t3 is there + if (lr.resp_send_timestamp == 0) {continue;} + + // Estimated network transmission times + int64 est_req_usec = RpcMsglglenToUsec(lr.lglen1); + int64 est_resp_usec = RpcMsglglenToUsec(lr.lglen2); + + // Fill in any missing times (incomlete RPCs) + // Missing t2 etc. must include estimated transmission time + // Times in usec + if (lr.req_rcv_timestamp == 0) { + lr.req_rcv_timestamp = lr.req_send_timestamp + est_req_usec + kMissingTime; + } + if (lr.resp_send_timestamp == 0) { + lr.resp_send_timestamp = lr.req_rcv_timestamp + kMissingTime; + } + if (lr.resp_rcv_timestamp == 0) { + lr.resp_rcv_timestamp = lr.req_send_timestamp + + (lr.resp_send_timestamp - lr.req_rcv_timestamp) + + est_req_usec + kMissingTime + est_resp_usec + kMissingTime; + } +////DumpLR(" ", &lr); + + + int64 t1 = lr.req_send_timestamp; + int64 t2 = lr.req_rcv_timestamp; + int64 t3 = lr.resp_send_timestamp; + int64 t4 = lr.resp_rcv_timestamp; + + // Only look at complete RPCs + if (t4 == 0) {continue;} + + uint64 map_key = (static_cast(lr.client_ip) << 32) | lr.server_ip; + BucketMap::iterator it = bucketmap->find(map_key); + if (it == bucketmap->end()) { + //New machine pair + BucketStruct temp; + InitBucketStruct(&temp); + (*bucketmap)[map_key] = temp; + it = bucketmap->find(map_key); +//fprintf(stdout, "New pair: ip %08x %08x t1-t4 %lld %lld %lld %lld\n", +//lr.client_ip, lr.server_ip, t1, t2, t3, t4); + } + BucketStruct* cur_pair = &it->second; + + int64 slop = (t4 - t1) - (t3 - t2) - est_req_usec - est_resp_usec; + +// Do not want degenerate missing t3 and t4 to make tiny always-win slop + + + // Arithmetic is wonky if zero or negative slop factored in + if (slop < 2) slop = 2; // Two usec minimum slop + + int cur_bucket = cur_pair->entrynum >> cur_pair->bucket_shift; + if (cur_bucket >= kBucketNum) { +//fprintf(stdout, " Halve buckets\n"); + // Compress into half as many buckets + // DumpBuckets(stdout, cur_pair->buckets); + for (int i = 0; i < (kBucketNum >> 1); ++i) { + // Keep lower slop of a pair + if (cur_pair->buckets[2 * i].slop <= cur_pair->buckets[2 * i + 1].slop) { + cur_pair->buckets[i] = cur_pair->buckets[2 * i]; + } else { + cur_pair->buckets[i] = cur_pair->buckets[2 * i + 1]; + } + } + ++cur_pair->bucket_shift; + cur_bucket = cur_pair->entrynum >> cur_pair->bucket_shift; + InitBuckets(cur_bucket, cur_pair->buckets); + } + + if (slop < cur_pair->buckets[cur_bucket].slop) { +//fprintf(stdout, " [%d] slop %lld @ %s\n", cur_bucket, slop, datetostr(t1)); + int64 deltamin = (t1 - t2) + est_req_usec; + int64 deltamax = (t4 - t3) - est_resp_usec; + if (deltamin >= deltamax) { + // Oops, they crossed + int64 mid = (deltamin + deltamax) / 2; + deltamin = mid - 1; + deltamax = mid + 1; + } + cur_pair->buckets[cur_bucket].slop = slop; + cur_pair->buckets[cur_bucket].tfrom = (t2 + t3) / 2; + cur_pair->buckets[cur_bucket].deltamin = deltamin; + cur_pair->buckets[cur_bucket].deltamax = deltamax; + } + + ++cur_pair->entrynum; + } + + fclose(logfile); +} + +// For each endpoint pair in the bucket map, calculate best fit line for delta_yi +void CalculateFits(BucketMap* bucketmap) { + for (BucketMap::iterator it = bucketmap->begin(); it != bucketmap->end(); ++it) { + uint64 map_key = it->first; + uint32 client_ip = map_key >> 32; + uint32 server_ip = map_key & 0x00000000ffffffffl; + BucketStruct* cur_pair = &it->second; + + fprintf(stdout, "\nCalculateFits %08x <== %08x\n", client_ip, server_ip); + + Fit(cur_pair); + + DumpAlignments(stdout, cur_pair); + } +} + +// Find a client or server ip that is unmapped (to use it as the next base_ip) +uint32 FindUnmappedIp(const BucketMap* bucketmap, const IpToAlignment* iptoalignment) { + for (BucketMap::const_iterator it = bucketmap->begin(); it != bucketmap->end(); ++it) { + uint64 map_key = it->first; + uint32 client_ip = map_key >> 32; + uint32 server_ip = map_key & 0x00000000ffffffffl; + const BucketStruct* cur_pair = &it->second; + if (cur_pair->time_mapping_assigned) {continue;} + // Return an unmapped ip. + + if (iptoalignment->find(client_ip) == iptoalignment->end()) {return client_ip;} + if (iptoalignment->find(server_ip) == iptoalignment->end()) {return server_ip;} + } + return 0; +} + +bool IsMapped(const IpToAlignment* iptoalignment, uint32 ip) { + return iptoalignment->find(ip) != iptoalignment->end(); +} + +// Transitively update alignments to map all times for all machines to +// time on the lowest ip address encountered +// TODO: use map to pick off all machine pairs +// +// we have a collection of lo<=hi time mappings +// we want to change them so that they all map to the lowest +// lowest <= A +// lowest <= B +// lowest <= C, etc. +// mark any that already map to lowest, then iteratively +// find Y s.t. Y does not map to lowest and Y maps to X and X maps to lowest +// remap Y => X and X => lowest so that Y => lowest +// Iterate until all map to lowet or no change (some disconnected machines) +// either start over withlowest umapped, or comment and stop +// +void TransitiveAlignment(BucketMap* bucketmap) { + // Create a time mapping for each ip address + IpToAlignment iptoalignment; + uint32 base_ip; + while ((base_ip = FindUnmappedIp(bucketmap, &iptoalignment)) != 0) { + // Add identity mapping base_ip => base_ip +fprintf(stdout, "\nTransitiveAlignment, base is %08x\n", base_ip); + Alignment temp; + InitAlignment(&temp); + iptoalignment[base_ip] = temp; +fprintf(stdout, " iptoalignment[%08x] ", base_ip); DumpAlignment(stdout, &temp); + bool changed; + do { + changed = false; + for (BucketMap::iterator it = bucketmap->begin(); it != bucketmap->end(); ++it) { + uint64 map_key = it->first; + uint32 client_ip = map_key >> 32; + uint32 server_ip = map_key & 0x00000000ffffffffl; + BucketStruct* cur_pair = &it->second; +//if (1 < cur_pair->entrynum) {DumpBucketStruct(stdout, cur_pair);} + if (cur_pair->time_mapping_assigned) {continue;} + if (IsMapped(&iptoalignment, client_ip) && + !IsMapped(&iptoalignment, server_ip)) { + // Have server==>client and client has known mapping to base + // Merge server==>client and client==>base into server==>base +fprintf(stdout, " Align %08x <== %08x to %08x\n", client_ip, server_ip, base_ip); + const Alignment* tobase = &iptoalignment[client_ip]; + MergeAlignment(&cur_pair->t14_alignment, tobase, &cur_pair->t14_alignment); + MergeAlignment(&cur_pair->t23_alignment, tobase, &cur_pair->t23_alignment); + iptoalignment[client_ip] = cur_pair->t14_alignment; +fprintf(stdout, " iptoalignment[%08x] ", client_ip); DumpAlignment(stdout, &cur_pair->t14_alignment); + iptoalignment[server_ip] = cur_pair->t23_alignment; +fprintf(stdout, " iptoalignment[%08x] ", server_ip); DumpAlignment(stdout, &cur_pair->t23_alignment); + cur_pair->time_mapping_assigned = true; + changed = true; + DumpAlignments(stdout, cur_pair); + } else if (IsMapped(&iptoalignment, server_ip) && + !IsMapped(&iptoalignment, client_ip)) { + // Have server==>client and server has known mapping to base + // Invert server==>client to get client==>server + // Merge client==>server and server==>base into client==>base +fprintf(stdout, " Align %08x ==> %08x to %08x\n", client_ip, server_ip, base_ip); + const Alignment* tobase = &iptoalignment[server_ip]; + Alignment temp; + InvertAlignment(&cur_pair->t23_alignment, &temp); + InitAlignment(&cur_pair->t23_alignment); + MergeAlignment(&temp, tobase, &cur_pair->t14_alignment); + MergeAlignment(&cur_pair->t23_alignment, tobase, &cur_pair->t23_alignment); + iptoalignment[client_ip] = cur_pair->t14_alignment; +fprintf(stdout, " iptoalignment[%08x] ", client_ip); DumpAlignment(stdout, &cur_pair->t14_alignment); + iptoalignment[server_ip] = cur_pair->t23_alignment; +fprintf(stdout, " iptoalignment[%08x] ", server_ip); DumpAlignment(stdout, &cur_pair->t23_alignment); + cur_pair->time_mapping_assigned = true; + changed = true; + DumpAlignments(stdout, cur_pair); + } + } + } while (changed); + } +} + +// Read and rewrite log files, updating all times +void Pass2(const char* fname, const BucketMap* bucketmap) { + fprintf(stdout, "\nPass2: %s\n", fname); + + FILE* logfile = fopen(fname, "rb"); + if (logfile == NULL) { + fprintf(stderr, "%s did not open\n", fname); + return; + } + + string newfname = FnameAppend(fname, "_align"); + FILE* newlogfile = fopen(newfname.c_str(), "wb"); + if (newlogfile == NULL) { + fprintf(stderr, "%s did not open\n", newfname.c_str()); + return; + } + + BinaryLogRecord lr; + int64 basetime = 0; + while(fread(&lr, sizeof(BinaryLogRecord), 1, logfile) != 0) { + + // Align times: look at two ip values, find alignment, remap all four times + int64 t1 = lr.req_send_timestamp; + int64 t2 = lr.req_rcv_timestamp; + int64 t3 = lr.resp_send_timestamp; + int64 t4 = lr.resp_rcv_timestamp; + + uint64 map_key = (static_cast(lr.client_ip) << 32) | lr.server_ip; + BucketMap::const_iterator it = bucketmap->find(map_key); + if (it == bucketmap->end()) { + //fprintf(stdout, "ERROR: unknown ip pair %08x %08x\n", + // lr.client_ip, lr.server_ip); + fwrite(&lr, sizeof(BinaryLogRecord), 1, newlogfile); + continue; + } + const BucketStruct* cur_pair = &it->second; + + // Do the remapping + const Alignment* t14 = &cur_pair->t14_alignment; + const Alignment* t23 = &cur_pair->t23_alignment; + + int64 delta_t1, delta_t2, delta_t3, delta_t4; + delta_t1 = t14->m * (t1 - t14->y0) + t14->b; + delta_t2 = t23->m * (t2 - t23->y0) + t23->b; + delta_t3 = t23->m * (t3 - t23->y0) + t23->b; + delta_t4 = t14->m * (t4 - t14->y0) + t14->b; + + // Only update incoming nonzero values (zeros in incomplete transactions) + if (lr.req_send_timestamp == 0) {delta_t1 = 0;} + if (lr.req_rcv_timestamp == 0) {delta_t2 = 0;} + if (lr.resp_send_timestamp == 0) {delta_t3 = 0;} + if (lr.resp_rcv_timestamp == 0) {delta_t4 = 0;} + + // Enforce that nonzero times are non-decreasing + if (lr.req_send_timestamp != 0) { + lr.req_send_timestamp = t1 + delta_t1; + } + if (lr.req_rcv_timestamp != 0) { + lr.req_rcv_timestamp = imax(t2 + delta_t2, lr.req_send_timestamp); + } + if (lr.resp_send_timestamp != 0) { + lr.resp_send_timestamp = imax(t3 + delta_t3, lr.req_rcv_timestamp); + } + if (lr.resp_rcv_timestamp != 0) { + lr.resp_rcv_timestamp = imax(t4 + delta_t4, lr.resp_send_timestamp); + } + +/////fprintf(stdout, "%lld t1 += %lld, t2 += %lld, t3 += %lld, t4 += %lld, server_ip = %08x\n", +////t1, delta_t1, delta_t2, delta_t3, delta_t4, lr.server_ip); + +////DumpLR(" new", &lr); + + fwrite(&lr, sizeof(BinaryLogRecord), 1, newlogfile); + } + + fclose(logfile); + fclose(newlogfile); + fprintf(stderr, " %s written\n", newfname.c_str()); +} + + +int main(int argc, const char** argv) { + bool dump_all = false; + int next_arg = 1; + if ((argc < 2) || (argv[1] == NULL)) { + fprintf(stderr, "Usage: timealign +\n"); + return 0; + } + if ((argc > 2) && (strcmp(argv[2], "-all") == 0)) { + dump_all = true; + ++next_arg; + } + + BucketMap bucketmap; + + for (int i = next_arg; i < argc; ++i) { + if (strstr(argv[i], "_align.log") != 0) {continue;} + Pass1(argv[i], &bucketmap); + } + + CalculateFits(&bucketmap); + + TransitiveAlignment(&bucketmap); + + for (int i = next_arg; i < argc; ++i) { + if (strstr(argv[i], "_align.log") != 0) {continue;} + Pass2(argv[i], &bucketmap); + } + + return 0; +} + diff --git a/book-user-code/timecounters.h b/book-user-code/timecounters.h new file mode 100644 index 000000000000..5f67f9c6c89f --- /dev/null +++ b/book-user-code/timecounters.h @@ -0,0 +1,62 @@ +// timercounters.h +// +// Reading cycle counter and gettimeofday counter, on various architectures +// Also Pause() to slow down speculation in spin loops and give cycles ot any hyperthread +// +// Copyright 2021 Richard L. Sites + + +#ifndef __TIMERCOUNTERS_H__ +#define __TIMERCOUNTERS_H__ + +/* Add others as you find and test them */ +#define Isx86_64 defined(__x86_64) +#define IsAmd_64 Isx86_64 && defined(__znver1) +#define IsIntel_64 Isx86_64 && !defined(__znver1) + +#define IsArm_64 defined(__aarch64__) +#define IsRPi4 defined(__ARM_ARCH) && (__ARM_ARCH == 8) +#define IsRPi4_64 IsRPi4 && IsArm_64 + +#include // gettimeofday + +#if Isx86_64 +#include // __rdtsc() +#endif + +// Return a constant-rate "cycle" counter +inline int64_t GetCycles() { +#if Isx86_64 + // Increments once per cycle, implemented as increment by N every N (~35) cycles + return __rdtsc(); + +#elif IsRPi4_64 + // Increments once per 27.778 cycles for RPi4-B at 54 MHz counter and 1.5GHz CPU clock + // Call it 28 cycles + uint64_t counter_value; + asm volatile("mrs %x0, cntvct_el0" : "=r"(counter_value)); + return counter_value * 28; + +#else +#error Need cycle counter defines for your architecture +#endif + +} + +// Return current time of day as microseconds since January 1, 1970 +inline int64_t GetUsec() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000000l) + tv.tv_usec; +} + +inline void Pause() { +#if Isx86_64 + __pause(); +#else + // Nothing on Arm, etc. +#endif +} + + +#endif // __TIMERCOUNTERS_H__ diff --git a/book-user-code/unmakeself.cc b/book-user-code/unmakeself.cc new file mode 100644 index 000000000000..8ddab428507b --- /dev/null +++ b/book-user-code/unmakeself.cc @@ -0,0 +1,101 @@ +// Little program to UN-make a self-contained HTML file for displaying dclab graphs. +// Copyright 2021 Richard L. Sites +// +// Inputs +// Self-contained HTML file +// +// Output +// The contained JSON file written to stdout +// If you want, then pipe through sed 's/], /],\n/g' +// + +#include +#include +#include // exit +#include + +static const char* const_text_1 = ""; + +static const char* const_text_3 = "var myString = '"; +static const char* const_text_4 = "';"; + +static const char* const_text_5 = "data = JSON.parse(myString); newdata2_resize(data);"; +static const char* const_text_6 = ""; + + +void usage() { + fprintf(stderr, "Usage: unmakeself \n"); + exit(0); +} + +int main (int argc, const char** argv) { + FILE* finhtml; + if (argc < 2) { + finhtml = stdin; + } else { + finhtml = fopen(argv[1], "rb"); + if (finhtml == NULL) { + fprintf(stderr, "%s did not open.\n", argv[1]); + return 0; + } + } + + char* inhtml_buf = new char[250000000]; // 250MB + + int html_len = fread(inhtml_buf, 1, 250000000, finhtml); + fclose(finhtml); + + + char* self0 = strstr(inhtml_buf, ""); + char* self1 = strstr(inhtml_buf, ""); + char* self2 = strstr(inhtml_buf, ""); + + if (self0 == NULL || self1 == NULL || self2 == NULL) { + fprintf(stderr, "%s does not contain selfcontained* comments\n", argv[1]); + exit(0); + } + + char* self1_end = strchr(self1 + 1, '\n'); + if (self1_end == NULL) {fprintf(stderr, "Missing after selfcontained1\n");} + ++self1_end; // over the + + char* self2_end = strchr(self2 + 1, '\n'); + if (self2_end == NULL) {fprintf(stderr, "Missing after selfcontained2\n");} + ++self2_end; // over the + + + //for (int i = 0; i < json_len; ++i) { + // if (injson_buf[i] == '\n') { + // injson_buf[i] = ' '; + // } + //} + + // JSON is in self1_end .. self_2 + // Within this, there is a single-quote string that we want. + *self2 = '\0'; + char* quote1 = strchr(self1_end, '\''); + if (quote1 == NULL) { + fprintf(stderr, "Missing '..' string\n"); + return 0; + } +//fprintf(stderr, "quote1 at offset %d\n", (int)(quote1 - inhtml_buf)); + ++quote1; // Over the quote + + char* quote2 = strchr(quote1, '\''); + if (quote2 == NULL) { + fprintf(stderr, "Missing '..' string\n"); + return 0; + } +//fprintf(stderr, "quote2 at offset %d\n", (int)(quote2 - inhtml_buf)); + + --quote2; // Back over the quote + + // Length of json inhtml piece + int len3 = quote2 - quote1; + fwrite(quote1, 1, len3, stdout); + + free(inhtml_buf); + return 0; +} + diff --git a/book-user-code/whetstone_ku.c b/book-user-code/whetstone_ku.c new file mode 100644 index 000000000000..7b288df6c63a --- /dev/null +++ b/book-user-code/whetstone_ku.c @@ -0,0 +1,498 @@ +/* + * C Converted Whetstone Double Precision Benchmark + * Version 1.2 22 March 1998 + * + * (c) Copyright 1998 Painter Engineering, Inc. + * All Rights Reserved. + * + * Permission is granted to use, duplicate, and + * publish this text and program as long as it + * includes this entire comment block and limited + * rights reference. + * + * Converted by Rich Painter, Painter Engineering, Inc. based on the + * www.netlib.org benchmark/whetstoned version obtained 16 March 1998. + * + * A novel approach was used here to keep the look and feel of the + * FORTRAN version. Altering the FORTRAN-based array indices, + * starting at element 1, to start at element 0 for C, would require + * numerous changes, including decrementing the variable indices by 1. + * Instead, the array E1[] was declared 1 element larger in C. This + * allows the FORTRAN index range to function without any literal or + * variable indices changes. The array element E1[0] is simply never + * used and does not alter the benchmark results. + * + * The major FORTRAN comment blocks were retained to minimize + * differences between versions. Modules N5 and N12, like in the + * FORTRAN version, have been eliminated here. + * + * An optional command-line argument has been provided [-c] to + * offer continuous repetition of the entire benchmark. + * An optional argument for setting an alternate LOOP count is also + * provided. Define PRINTOUT to cause the POUT() function to print + * outputs at various stages. Final timing measurements should be + * made with the PRINTOUT undefined. + * + * Questions and comments may be directed to the author at + * r.painter@ieee.org + */ + +/* + * dsites 2020.06.02 compile with g++ -O2 whetstone_ku.c kutrace_lib.cc -lm -o whetstone_ku + * KUtrace labels for sections added + * make all loop results live (else modules 6 7 8 empty) + */ + +/* +C********************************************************************** +C Benchmark #2 -- Double Precision Whetstone (A001) +C +C o This is a REAL*8 version of +C the Whetstone benchmark program. +C +C o DO-loop semantics are ANSI-66 compatible. +C +C o Final measurements are to be made with all +C WRITE statements and FORMAT sttements removed. +C +C********************************************************************** +*/ + +/* standard C library headers required */ +#include +#include +#include +#include +#include // gettimeofday + + +/* the following is optional depending on the timing function used */ +#include + +/* dsites 2020.06.02 */ +#include "kutrace_lib.h" + +/* map the FORTRAN math functions, etc. to the C versions */ +#define DSIN sin +#define DCOS cos +#define DATAN atan +#define DLOG log +#define DEXP exp +#define DSQRT sqrt +#define IF if + +/* function prototypes */ +void POUT(long N, long J, long K, double X1, double X2, double X3, double X4); +void PA(double E[]); +void P0(void); +void P3(double X, double Y, double *Z); +#define USAGE "usage: whetdc [-c] [loops]\n" + +/* + COMMON T,T1,T2,E1(4),J,K,L +*/ +double T,T1,T2,E1[5]; +double VT2; /* dsites added volatile to make module 8 live */ +int J,K,L; + +inline uint64_t GetUsec() { + struct timeval tv; + gettimeofday(&tv, NULL); + return (tv.tv_sec * 1000000lu) + tv.tv_usec; +} + +uint64_t startusec, elapsedusec; + +int +main(int argc, char *argv[]) +{ + bool makelive = true; /* dsites */ + + /* used in the FORTRAN version */ + long I; + long N1, N2, N3, N4, N6, N7, N8, N9, N10, N11; + double X1,X2,X3,X4,X,Y,Z; + long LOOP; + int II, JJ; + + /* added for this version */ + long loopstart; + long startsec, finisec; + float KIPS; + int continuous; + + loopstart = 1000; /* see the note about LOOP below */ + continuous = 0; + + II = 1; /* start at the first arg (temp use of II here) */ + while (II < argc) { + if (strncmp(argv[II], "-c", 2) == 0 || argv[II][0] == 'c') { + continuous = 1; + } else if (atol(argv[II]) > 0) { + loopstart = atol(argv[II]); + } else { + fprintf(stderr, USAGE); + return(1); + } + II++; + } + +LCONT: +/* +C +C Start benchmark timing at this point. +C +*/ + startusec = GetUsec(); + startsec = time(0); + makelive = (startsec == 0); /* dsites compiler doesn't know this is always false */ + +/* +C +C The actual benchmark starts here. +C +*/ + T = .499975; + T1 = 0.50025; + T2 = 2.0; + VT2 = 2.0; +/* +C +C With loopcount LOOP=10, one million Whetstone instructions +C will be executed in EACH MAJOR LOOP..A MAJOR LOOP IS EXECUTED +C 'II' TIMES TO INCREASE WALL-CLOCK TIMING ACCURACY. +C + LOOP = 1000; +*/ + LOOP = loopstart; + II = 1; + + JJ = 1; + +IILOOP: + N1 = 0; + N2 = 12 * LOOP; + N3 = 14 * LOOP; + N4 = 345 * LOOP; + N6 = 210 * LOOP; + N7 = 32 * LOOP; + N8 = 899 * LOOP; + N9 = 616 * LOOP; + N10 = 0; + N11 = 93 * LOOP; +/* +C +C Module 1: Simple identifiers +C +*/ + // kutrace::mark_a("mod 1"); // omitted + + X1 = 1.0; + X2 = -1.0; + X3 = -1.0; + X4 = -1.0; + + for (I = 1; I <= N1; I++) { + X1 = (X1 + X2 + X3 - X4) * T; + X2 = (X1 + X2 - X3 + X4) * T; + X3 = (X1 - X2 + X3 + X4) * T; + X4 = (-X1+ X2 + X3 + X4) * T; + } + if (makelive) POUT(N1,N1,N1,X1,X2,X3,X4); /* dsites */ + +#ifdef PRINTOUT + IF (JJ==II)POUT(N1,N1,N1,X1,X2,X3,X4); +#endif + +/* +C +C Module 2: Array elements +C +*/ + kutrace::mark_a("mod 2"); + + E1[1] = 1.0; + E1[2] = -1.0; + E1[3] = -1.0; + E1[4] = -1.0; + + for (I = 1; I <= N2; I++) { + E1[1] = ( E1[1] + E1[2] + E1[3] - E1[4]) * T; + E1[2] = ( E1[1] + E1[2] - E1[3] + E1[4]) * T; + E1[3] = ( E1[1] - E1[2] + E1[3] + E1[4]) * T; + E1[4] = (-E1[1] + E1[2] + E1[3] + E1[4]) * T; + } + if (makelive) POUT(N2,N3,N2,E1[1],E1[2],E1[3],E1[4]); /* dsites */ + +#ifdef PRINTOUT + IF (JJ==II)POUT(N2,N3,N2,E1[1],E1[2],E1[3],E1[4]); +#endif + +/* +C +C Module 3: Array as parameter +C +*/ + kutrace::mark_a("mod 3"); + + for (I = 1; I <= N3; I++) + PA(E1); + if (makelive) POUT(N3,N2,N2,E1[1],E1[2],E1[3],E1[4]); /* dsites */ + +#ifdef PRINTOUT + IF (JJ==II)POUT(N3,N2,N2,E1[1],E1[2],E1[3],E1[4]); +#endif + +/* +C +C Module 4: Conditional jumps +C +*/ + kutrace::mark_a("mod 4"); + + J = 1; + for (I = 1; I <= N4; I++) { + if (J == 1) + J = 2; + else + J = 3; + + if (J > 2) + J = 0; + else + J = 1; + + if (J < 1) + J = 1; + else + J = 0; + } + if (makelive) POUT(N4,J,J,X1,X2,X3,X4); /* dsites */ + +#ifdef PRINTOUT + IF (JJ==II)POUT(N4,J,J,X1,X2,X3,X4); +#endif + +/* +C +C Module 5: Omitted +C Module 6: Integer arithmetic +C +*/ + kutrace::mark_a("mod 6"); + + + J = 1; + K = 2; + L = 3; + + for (I = 1; I <= N6; I++) { + J = J * (K-J) * (L-K); + K = L * K - (L-J) * K; + L = (L-K) * (K+J); + E1[L-1] = J + K + L; + E1[K-1] = J * K * L; + } + if (makelive) POUT(N6,J,K,E1[1],E1[2],E1[3],E1[4]); /* dsites */ + +#ifdef PRINTOUT + IF (JJ==II)POUT(N6,J,K,E1[1],E1[2],E1[3],E1[4]); +#endif + +/* +C +C Module 7: Trigonometric functions +C +*/ + kutrace::mark_a("mod 7"); + + X = 0.5; + Y = 0.5; + + for (I = 1; I <= N7; I++) { + X = T * DATAN(T2*DSIN(X)*DCOS(X)/(DCOS(X+Y)+DCOS(X-Y)-1.0)); + Y = T * DATAN(T2*DSIN(Y)*DCOS(Y)/(DCOS(X+Y)+DCOS(X-Y)-1.0)); + } + if (makelive) POUT(N7,J,K,X,X,Y,Y); /* dsites */ + +#ifdef PRINTOUT + IF (JJ==II)POUT(N7,J,K,X,X,Y,Y); +#endif + +/* +C +C Module 8: Procedure calls +C +*/ + kutrace::mark_a("mod 8"); + + X = 1.0; + Y = 1.0; + Z = 1.0; + + for (I = 1; I <= N8; I++) + P3(X,Y,&Z); + if (makelive) POUT(N8,J,K,X,Y,Z,Z); /* dsites */ + +#ifdef PRINTOUT + IF (JJ==II)POUT(N8,J,K,X,Y,Z,Z); +#endif + +/* +C +C Module 9: Array references +C +*/ + kutrace::mark_a("mod 9"); + + J = 1; + K = 2; + L = 3; + E1[1] = 1.0; + E1[2] = 2.0; + E1[3] = 3.0; + + for (I = 1; I <= N9; I++) + P0(); + if (makelive) POUT(N9,J,K,E1[1],E1[2],E1[3],E1[4]); /* dsites */ + +#ifdef PRINTOUT + IF (JJ==II)POUT(N9,J,K,E1[1],E1[2],E1[3],E1[4]); +#endif + +/* +C +C Module 10: Integer arithmetic +C +*/ + // kutrace::mark_a("mod 10"); // omitted + + J = 2; + K = 3; + + for (I = 1; I <= N10; I++) { + J = J + K; + K = J + K; + J = K - J; + K = K - J - J; + } + if (makelive) POUT(N10,J,K,X1,X2,X3,X4); /* dsites */ + +#ifdef PRINTOUT + IF (JJ==II)POUT(N10,J,K,X1,X2,X3,X4); +#endif + +/* +C +C Module 11: Standard functions +C +*/ + kutrace::mark_a("mod 11"); + + X = 0.75; + + for (I = 1; I <= N11; I++) + X = DSQRT(DEXP(DLOG(X)/T1)); + if (makelive) POUT(N11,J,K,X,X,X,X); /* dsites */ + +#ifdef PRINTOUT + IF (JJ==II)POUT(N11,J,K,X,X,X,X); +#endif + +/* +C +C THIS IS THE END OF THE MAJOR LOOP. +C +*/ + if (++JJ <= II) + goto IILOOP; + +/* +C +C Stop benchmark timing at this point. +C +*/ + elapsedusec = GetUsec() - startusec; + finisec = time(0); + +/* +C---------------------------------------------------------------- +C Performance in Whetstone KIP's per second is given by +C +C (100*LOOP*II)/TIME +C +C where TIME is in seconds. +C-------------------------------------------------------------------- +*/ + printf("\n"); + if (finisec-startsec <= 0) { + printf("Insufficient duration- Increase the LOOP count\n"); + return(1); + } + +/* + printf("Loops: %ld, Iterations: %d, Duration: %ld sec.\n", + LOOP, II, finisec-startsec); + + KIPS = (100.0*LOOP*II)/(float)(finisec-startsec); + if (KIPS >= 1000.0) + printf("C Converted Double Precision Whetstones: %.1f MIPS\n", KIPS/1000.0); + else + printf("C Converted Double Precision Whetstones: %.1f KIPS\n", KIPS); +*/ + + printf("Loops: %ld, Iterations: %d, Duration: %.3f sec.\n", + LOOP, II, elapsedusec / 1000000.0); + printf("C Converted Double Precision Whetstones: %.0f MIPS\n", (100000.0*LOOP*II) / elapsedusec); + + if (continuous) + goto LCONT; + + return(0); +} + +void +PA(double E[]) +{ + J = 0; + +L10: + E[1] = ( E[1] + E[2] + E[3] - E[4]) * T; + E[2] = ( E[1] + E[2] - E[3] + E[4]) * T; + E[3] = ( E[1] - E[2] + E[3] + E[4]) * T; + E[4] = (-E[1] + E[2] + E[3] + E[4]) / T2; + J += 1; + + if (J < 6) + goto L10; +} + +void +P0(void) +{ + E1[J] = E1[K]; + E1[K] = E1[L]; + E1[L] = E1[J]; +} + +void __attribute__ ((noinline)) /* dsites */ +P3(double X, double Y, double *Z) +{ + double X1, Y1; + + X1 = X; + Y1 = Y; + X1 = T * (X1 + Y1); + Y1 = T * (X1 + Y1); + *Z = (X1 + Y1) / VT2; +} + +// #ifdef PRINTOUT +#if 1 +void +POUT(long N, long J, long K, double X1, double X2, double X3, double X4) +{ + printf("%7ld %7ld %7ld %12.4e %12.4e %12.4e %12.4e\n", + N, J, K, X1, X2, X3, X4); +} +#endif diff --git a/bookcode/book-user-code.zip b/bookcode/book-user-code.zip deleted file mode 100644 index d3033fbdadaf..000000000000 Binary files a/bookcode/book-user-code.zip and /dev/null differ