Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Profiling] Aggregate flamegraph by process name and thread name #119115

Open
wants to merge 26 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
4d35b22
Add field process.executable.name to profiling-events
rockdaboot Dec 17, 2024
a4aea04
Amend query to aggregate events by executable name
rockdaboot Dec 17, 2024
adae211
Send flamegraph row with grouping by executable name
rockdaboot Dec 18, 2024
29fa515
Flamegraph sub-aggregation by thread name
rockdaboot Dec 18, 2024
ea63b21
Rework internal data model
rockdaboot Dec 19, 2024
823579c
Cleanups
rockdaboot Dec 19, 2024
72254fb
Fix building tests
rockdaboot Dec 19, 2024
1d2b984
Fix GetStackTracesResponseTests
rockdaboot Jan 2, 2025
535d30d
Fix unit tests
rockdaboot Jan 3, 2025
b651317
Fix remaining unit tests
rockdaboot Jan 3, 2025
398fc60
[CI] Auto commit changes from spotless
elasticsearchmachine Jan 3, 2025
0a33c7e
Fix flamegraph yaml tests
rockdaboot Jan 3, 2025
8046c83
Fix yaml REST tests
rockdaboot Jan 4, 2025
9cf66c8
Increase INDEX_TEMPLATE_VERSION for profiling.executable.name
rockdaboot Jan 6, 2025
cf4d652
Fix yamlRestCompatTest
rockdaboot Jan 6, 2025
e514874
Rename executable name to process name
rockdaboot Jan 8, 2025
28c990c
Merge branch 'main' into flamegraph-executable-name
rockdaboot Jan 8, 2025
4fdd4d8
Merge branch 'main' into flamegraph-executable-name
rockdaboot Jan 9, 2025
f3ca80c
Merge branch 'main' into flamegraph-executable-name
rockdaboot Jan 10, 2025
fbb669b
Remove warnings meant for testing
rockdaboot Jan 10, 2025
659a4fd
Replace ChunkedToXContentHelper.wrapWithObject() with .object()
rockdaboot Jan 13, 2025
f9ef6be
Fix comment in ProfilingIndexTemplateRegistry.java
rockdaboot Jan 13, 2025
62117b4
Simplify sorting of unique stacktrace and host IDs
rockdaboot Jan 13, 2025
57d3ad4
Merge branch 'main' into flamegraph-executable-name
rockdaboot Jan 13, 2025
c118150
[CI] Auto commit changes from spotless
elasticsearchmachine Jan 13, 2025
7cd497c
Merge branch 'main' into flamegraph-executable-name
rockdaboot Jan 13, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions x-pack/plugin/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -96,5 +96,7 @@ tasks.named("yamlRestCompatTestTransform").configure({ task ->
task.skipTest("esql/180_match_operator/match with non text field", "Match operator can now be used on non-text fields")
task.skipTest("esql/180_match_operator/match with functions", "Error message changed")
task.skipTest("esql/40_unsupported_types/semantic_text declared in mapping", "The semantic text field format changed")
task.replaceValueInMatch("Size", 49, "Test flamegraph from profiling-events")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this instruction needed here?

Copy link
Contributor Author

@rockdaboot rockdaboot Jan 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is because comment

task.replaceValueInMatch("Size", 49, "Test flamegraph from test-events")
})

Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"orchestrator.resource.name",
"host.name",
"container.name",
"process.executable.name",
christos68k marked this conversation as resolved.
Show resolved Hide resolved
"process.thread.name"
]
},
Expand Down Expand Up @@ -59,6 +60,9 @@
"process.thread.name": {
"type": "keyword"
},
"process.executable.name": {
danielmitterdorfer marked this conversation as resolved.
Show resolved Hide resolved
"type": "keyword"
},
"Stacktrace.count": {
"type": "short",
"index": false
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ public void testGetStackTracesUnfiltered() throws Exception {
);
GetFlamegraphResponse response = client().execute(GetFlamegraphAction.INSTANCE, request).get();
// only spot-check top level properties - detailed tests are done in unit tests
assertEquals(994, response.getSize());
assertEquals(1010, response.getSize());
assertEquals(1.0d, response.getSamplingRate(), 0.001d);
assertEquals(46, response.getSelfCPU());
assertEquals(1903, response.getTotalCPU());
assertEquals(1995, response.getTotalCPU());
assertEquals(46, response.getTotalSamples());

// The root node's values are the same as the top-level values.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import org.elasticsearch.index.query.TermQueryBuilder;

import java.util.List;
import java.util.Map;

public class GetStackTracesActionIT extends ProfilingTestCase {
public void testGetStackTracesUnfiltered() throws Exception {
Expand All @@ -36,7 +37,11 @@ public void testGetStackTracesUnfiltered() throws Exception {
assertEquals(1821, response.getTotalFrames());

assertNotNull(response.getStackTraceEvents());
assertEquals(3L, response.getStackTraceEvents().get("L7kj7UvlKbT-vN73el4faQ").count);

Map<TraceEventID, TraceEvent> traceEvents = response.getStackTraceEvents();

TraceEventID traceEventID = new TraceEventID("", "497295213074376", "8457605156473051743", "L7kj7UvlKbT-vN73el4faQ");
assertEquals(3L, response.getStackTraceEvents().get(traceEventID).count);

assertNotNull(response.getStackTraces());
// just do a high-level spot check. Decoding is tested in unit-tests
Expand All @@ -45,8 +50,6 @@ public void testGetStackTracesUnfiltered() throws Exception {
assertEquals(18, stackTrace.fileIds.length);
assertEquals(18, stackTrace.frameIds.length);
assertEquals(18, stackTrace.typeIds.length);
assertEquals(0.0000051026469d, stackTrace.annualCO2Tons, 0.0000000001d);
assertEquals(0.19825d, stackTrace.annualCostsUSD, 0.00001d);
// not determined by default
assertNull(stackTrace.subGroups);

Expand Down Expand Up @@ -80,7 +83,10 @@ public void testGetStackTracesGroupedByServiceName() throws Exception {
assertEquals(1821, response.getTotalFrames());

assertNotNull(response.getStackTraceEvents());
assertEquals(3L, response.getStackTraceEvents().get("L7kj7UvlKbT-vN73el4faQ").count);

TraceEventID traceEventID = new TraceEventID("", "497295213074376", "8457605156473051743", "L7kj7UvlKbT-vN73el4faQ");
assertEquals(3L, response.getStackTraceEvents().get(traceEventID).count);
assertEquals(Long.valueOf(2L), response.getStackTraceEvents().get(traceEventID).subGroups.getCount("basket"));

assertNotNull(response.getStackTraces());
// just do a high-level spot check. Decoding is tested in unit-tests
Expand All @@ -89,9 +95,6 @@ public void testGetStackTracesGroupedByServiceName() throws Exception {
assertEquals(18, stackTrace.fileIds.length);
assertEquals(18, stackTrace.frameIds.length);
assertEquals(18, stackTrace.typeIds.length);
assertEquals(0.0000051026469d, stackTrace.annualCO2Tons, 0.0000000001d);
assertEquals(0.19825d, stackTrace.annualCostsUSD, 0.00001d);
assertEquals(Long.valueOf(2L), stackTrace.subGroups.getCount("basket"));

assertNotNull(response.getStackFrames());
StackFrame stackFrame = response.getStackFrames().get("8NlMClggx8jaziUTJXlmWAAAAAAAAIYI");
Expand Down Expand Up @@ -127,8 +130,13 @@ public void testGetStackTracesFromAPMWithMatchNoDownsampling() throws Exception
assertEquals(1.0d, response.getSamplingRate(), 0.001d);

assertNotNull(response.getStackTraceEvents());
assertEquals(3L, response.getStackTraceEvents().get("Ce77w10WeIDow3kd1jowlA").count);
assertEquals(2L, response.getStackTraceEvents().get("JvISdnJ47BQ01489cwF9DA").count);

TraceEventID traceEventID = new TraceEventID("", "", "", "Ce77w10WeIDow3kd1jowlA");
assertEquals(3L, response.getStackTraceEvents().get(traceEventID).count);
assertEquals(Long.valueOf(3L), response.getStackTraceEvents().get(traceEventID).subGroups.getCount("encodeSha1"));

traceEventID = new TraceEventID("", "", "", "JvISdnJ47BQ01489cwF9DA");
assertEquals(2L, response.getStackTraceEvents().get(traceEventID).count);

assertNotNull(response.getStackTraces());
// just do a high-level spot check. Decoding is tested in unit-tests
Expand All @@ -137,9 +145,6 @@ public void testGetStackTracesFromAPMWithMatchNoDownsampling() throws Exception
assertEquals(39, stackTrace.fileIds.length);
assertEquals(39, stackTrace.frameIds.length);
assertEquals(39, stackTrace.typeIds.length);
assertTrue(stackTrace.annualCO2Tons > 0.0d);
assertTrue(stackTrace.annualCostsUSD > 0.0d);
assertEquals(Long.valueOf(3L), stackTrace.subGroups.getCount("encodeSha1"));

assertNotNull(response.getStackFrames());
StackFrame stackFrame = response.getStackFrames().get("fhsEKXDuxJ-jIJrZpdRuSAAAAAAAAFtj");
Expand Down Expand Up @@ -175,9 +180,13 @@ public void testGetStackTracesFromAPMWithMatchAndDownsampling() throws Exception
assertEquals(0.2d, response.getSamplingRate(), 0.001d);

assertNotNull(response.getStackTraceEvents());

// as the sampling rate is 0.2, we see 5 times more samples (random sampler agg automatically adjusts sample count)
assertEquals(5 * 3L, response.getStackTraceEvents().get("Ce77w10WeIDow3kd1jowlA").count);
assertEquals(5 * 2L, response.getStackTraceEvents().get("JvISdnJ47BQ01489cwF9DA").count);
TraceEventID traceEventID = new TraceEventID("", "", "", "Ce77w10WeIDow3kd1jowlA");
assertEquals(5 * 3L, response.getStackTraceEvents().get(traceEventID).count);

traceEventID = new TraceEventID("", "", "", "JvISdnJ47BQ01489cwF9DA");
assertEquals(5 * 2L, response.getStackTraceEvents().get(traceEventID).count);

assertNotNull(response.getStackTraces());
// just do a high-level spot check. Decoding is tested in unit-tests
Expand All @@ -186,8 +195,6 @@ public void testGetStackTracesFromAPMWithMatchAndDownsampling() throws Exception
assertEquals(39, stackTrace.fileIds.length);
assertEquals(39, stackTrace.frameIds.length);
assertEquals(39, stackTrace.typeIds.length);
assertTrue(stackTrace.annualCO2Tons > 0.0d);
assertTrue(stackTrace.annualCostsUSD > 0.0d);
// not determined by default
assertNull(stackTrace.subGroups);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public class GetStackTracesResponse extends ActionResponse implements ChunkedToX
private final Map<String, String> executables;
@UpdateForV9(owner = UpdateForV9.Owner.PROFILING) // remove this field - it is unused in Kibana
@Nullable
private final Map<String, TraceEvent> stackTraceEvents;
private final Map<TraceEventID, TraceEvent> stackTraceEvents;
@UpdateForV9(owner = UpdateForV9.Owner.PROFILING) // remove this field - it is unused in Kibana
private final int totalFrames;
private final double samplingRate;
Expand All @@ -42,7 +42,7 @@ public GetStackTracesResponse(
Map<String, StackTrace> stackTraces,
Map<String, StackFrame> stackFrames,
Map<String, String> executables,
Map<String, TraceEvent> stackTraceEvents,
Map<TraceEventID, TraceEvent> stackTraceEvents,
int totalFrames,
double samplingRate,
long totalSamples
Expand Down Expand Up @@ -73,7 +73,7 @@ public Map<String, String> getExecutables() {
return executables;
}

public Map<String, TraceEvent> getStackTraceEvents() {
public Map<TraceEventID, TraceEvent> getStackTraceEvents() {
return stackTraceEvents;
}

Expand All @@ -100,23 +100,24 @@ public Iterator<? extends ToXContent> toXContentChunked(ToXContent.Params params
optional(
"stack_trace_events",
stackTraceEvents,
(n, v) -> ChunkedToXContentHelper.map(n, v, entry -> (b, p) -> b.field(entry.getKey(), entry.getValue().count))
(n, v) -> ChunkedToXContentHelper.wrapWithObject(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't this change the structure of the output (backwards-compatibility)?

Copy link
Contributor Author

@rockdaboot rockdaboot Jan 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you see any issues with this in Kibana (I didn't find any issue so far). BWC tests seem to run fine as well.

n,
Iterators.map(v.entrySet().iterator(), e -> (b, p) -> b.field(e.getKey().stacktraceID(), e.getValue().count))
)
),
Iterators.single((b, p) -> b.field("total_frames", totalFrames)),
Iterators.single((b, p) -> b.field("sampling_rate", samplingRate)),
Iterators.single((b, p) -> b.field("total_frames", totalFrames).field("sampling_rate", samplingRate).endObject())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like an unrelated cleanup / optimization?

Copy link
Contributor Author

@rockdaboot rockdaboot Jan 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it is an unrelated change. It has been suggested by Armin Braun in a chat. See comment

// the following fields are intentionally not written to the XContent representation (only needed on the transport layer):
//
// * start
// * end
// * totalSamples
ChunkedToXContentHelper.endObject()
);
}

private static <T> Iterator<? extends ToXContent> optional(
private static <K, T> Iterator<? extends ToXContent> optional(
String name,
Map<String, T> values,
BiFunction<String, Map<String, T>, Iterator<? extends ToXContent>> supplier
Map<K, T> values,
BiFunction<String, Map<K, T>, Iterator<? extends ToXContent>> supplier
) {
return (values != null) ? supplier.apply(name, values) : Collections.emptyIterator();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
package org.elasticsearch.xpack.profiling.action;

import java.time.Instant;
import java.util.List;
import java.util.Map;

class GetStackTracesResponseBuilder {
Expand All @@ -18,8 +17,7 @@ class GetStackTracesResponseBuilder {
private int totalFrames;
private Map<String, StackFrame> stackFrames;
private Map<String, String> executables;
private Map<String, TraceEvent> stackTraceEvents;
private List<TransportGetStackTracesAction.HostEventCount> hostEventCounts;
private Map<TraceEventID, TraceEvent> stackTraceEvents;
private double samplingRate;
private long totalSamples;
private Double requestedDuration;
Expand Down Expand Up @@ -67,19 +65,11 @@ public void setExecutables(Map<String, String> executables) {
this.executables = executables;
}

public void setStackTraceEvents(Map<String, TraceEvent> stackTraceEvents) {
public void setStackTraceEvents(Map<TraceEventID, TraceEvent> stackTraceEvents) {
this.stackTraceEvents = stackTraceEvents;
}

public void setHostEventCounts(List<TransportGetStackTracesAction.HostEventCount> hostEventCounts) {
this.hostEventCounts = hostEventCounts;
}

public List<TransportGetStackTracesAction.HostEventCount> getHostEventCounts() {
return hostEventCounts;
}

public Map<String, TraceEvent> getStackTraceEvents() {
public Map<TraceEventID, TraceEvent> getStackTraceEvents() {
return stackTraceEvents;
}

Expand Down Expand Up @@ -149,17 +139,17 @@ public void setTotalSamples(long totalSamples) {
public GetStackTracesResponse build() {
// Merge the TraceEvent data into the StackTraces.
if (stackTraces != null) {
for (Map.Entry<String, StackTrace> entry : stackTraces.entrySet()) {
String stacktraceID = entry.getKey();
TraceEvent event = stackTraceEvents.get(stacktraceID);
if (event != null) {
StackTrace stackTrace = entry.getValue();
stackTrace.count = event.count;
for (Map.Entry<TraceEventID, TraceEvent> entry : stackTraceEvents.entrySet()) {
TraceEventID traceEventID = entry.getKey();
StackTrace stackTrace = stackTraces.get(traceEventID.stacktraceID());
if (stackTrace != null) {
TraceEvent event = entry.getValue();
if (event.subGroups != null) {
stackTrace.subGroups = event.subGroups;
}
stackTrace.annualCO2Tons = event.annualCO2Tons;
stackTrace.annualCostsUSD = event.annualCostsUSD;
stackTrace.count += event.count;
stackTrace.annualCO2Tons += event.annualCO2Tons;
stackTrace.annualCostsUSD += event.annualCostsUSD;
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,22 +31,14 @@ final class StackTrace implements ToXContentObject {
double annualCostsUSD;
long count;

StackTrace(
int[] addressOrLines,
String[] fileIds,
String[] frameIds,
int[] typeIds,
double annualCO2Tons,
double annualCostsUSD,
long count
) {
StackTrace(int[] addressOrLines, String[] fileIds, String[] frameIds, int[] typeIds) {
this.addressOrLines = addressOrLines;
this.fileIds = fileIds;
this.frameIds = frameIds;
this.typeIds = typeIds;
this.annualCO2Tons = annualCO2Tons;
this.annualCostsUSD = annualCostsUSD;
this.count = count;
annualCO2Tons = 0.0d;
annualCostsUSD = 0.0d;
count = 0;
}

private static final int BASE64_FRAME_ID_LENGTH = 32;
Expand Down Expand Up @@ -210,7 +202,7 @@ public static StackTrace fromSource(Map<String, Object> source) {
// Step 2: Convert the run-length byte encoding into a list of uint8s.
int[] typeIDs = runLengthDecodeBase64Url(inputFrameTypes, inputFrameTypes.length(), countsFrameIDs);

return new StackTrace(addressOrLines, fileIDs, frameIDs, typeIDs, 0, 0, 0);
return new StackTrace(addressOrLines, fileIDs, frameIDs, typeIDs);
}

public void forNativeAndKernelFrames(Consumer<String> consumer) {
Expand All @@ -224,15 +216,15 @@ public void forNativeAndKernelFrames(Consumer<String> consumer) {

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field("address_or_lines", this.addressOrLines);
builder.field("file_ids", this.fileIds);
builder.field("frame_ids", this.frameIds);
builder.field("type_ids", this.typeIds);
builder.field("annual_co2_tons", this.annualCO2Tons);
builder.field("annual_costs_usd", this.annualCostsUSD);
builder.field("count", this.count);
builder.endObject();
builder.startObject()
.field("address_or_lines", this.addressOrLines)
.field("file_ids", this.fileIds)
.field("frame_ids", this.frameIds)
.field("type_ids", this.typeIds)
.field("annual_co2_tons", this.annualCO2Tons)
.field("annual_costs_usd", this.annualCostsUSD)
.field("count", this.count)
.endObject();
return builder;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,53 +7,29 @@

package org.elasticsearch.xpack.profiling.action;

import java.util.Objects;

final class TraceEvent {
final String stacktraceID;
long count;
double annualCO2Tons;
double annualCostsUSD;
long count;
SubGroup subGroups;

TraceEvent(String stacktraceID) {
this(stacktraceID, 0);
TraceEvent() {
this(0);
}

TraceEvent(String stacktraceID, long count) {
this.stacktraceID = stacktraceID;
TraceEvent(long count) {
this.count = count;
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
TraceEvent event = (TraceEvent) o;
return count == event.count && Objects.equals(stacktraceID, event.stacktraceID);
}

@Override
public int hashCode() {
return Objects.hash(stacktraceID, count);
}

@Override
public String toString() {
return "TraceEvent{"
+ "stacktraceID='"
+ stacktraceID
+ '\''
+ "count="
+ count
+ ", annualCO2Tons="
+ annualCO2Tons
+ ", annualCostsUSD="
+ annualCostsUSD
+ ", count="
+ count
+ ", subGroups="
+ subGroups
+ '}';
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/

package org.elasticsearch.xpack.profiling.action;

record TraceEventID(String processName, String threadName, String hostID, String stacktraceID) {}
Loading