Skip to content

Commit b8b8e24

Browse files
chriddypClaude
and
Claude
committed
Add distribution attribute to box plots to improve log-axis support
Implements a new distribution attribute for box plots with three options: - normal: Standard 1.5 * IQR rule for whiskers - log-normal: Calculates whiskers based on IQR in log units - auto (default): Uses log-normal on log axes, normal otherwise Prevents negative whiskers when using log scales, which would appear as infinitely long whiskers due to log axis behavior. Fixes #7388 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent b1c9ce5 commit b8b8e24

File tree

5 files changed

+312
-2
lines changed

5 files changed

+312
-2
lines changed

src/traces/box/attributes.js

+19
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,25 @@ module.exports = {
312312
'Q3 the median of the upper half.'
313313
].join(' ')
314314
},
315+
316+
distribution: {
317+
valType: 'enumerated',
318+
values: ['normal', 'log-normal', 'auto'],
319+
dflt: 'auto',
320+
editType: 'calc',
321+
description: [
322+
'Sets the underlying distribution used to compute the whiskers.',
323+
324+
'If *normal*, the whiskers are computed using the standard 1.5 * IQR rule,',
325+
'when displaying your data on a linear scale.',
326+
327+
'If *log-normal*, the whiskers are computed based on the IQR in log units,',
328+
'which prevents the lower fence from ever going negative (resulting in an',
329+
'infinitely long whisker on a log scale).',
330+
331+
'If *auto*, uses *log-normal* when displayed on a log axis, otherwise *normal*.'
332+
].join(' ')
333+
},
315334

316335
width: {
317336
valType: 'number',

src/traces/box/calc.js

+36-2
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ module.exports = function calc(gd, trace) {
3636
posLetter = 'x';
3737
hasPeriod = !!trace.xperiodalignment;
3838
}
39+
40+
// Determine whether to use log-normal distribution for whiskers
41+
var useLogNormal = trace.distribution === 'log-normal' ||
42+
(trace.distribution === 'auto' && valAxis.type === 'log');
3943

4044
var allPosArrays = getPosArrays(trace, posLetter, posAxis, fullLayout[numKey]);
4145
var posArray = allPosArrays[0];
@@ -78,6 +82,7 @@ module.exports = function calc(gd, trace) {
7882
if(hasPeriod && origPos) {
7983
cdi.orig_p = origPos[i]; // used by hover
8084
}
85+
cdi.usesLogNormal = useLogNormal;
8186

8287
cdi.q1 = d2c('q1');
8388
cdi.med = d2c('median');
@@ -213,6 +218,7 @@ module.exports = function calc(gd, trace) {
213218
if(ptsPerBin[i].length > 0) {
214219
cdi = {};
215220
cdi.pos = cdi[posLetter] = posDistinct[i];
221+
cdi.usesLogNormal = useLogNormal;
216222

217223
pts = cdi.pts = ptsPerBin[i].sort(sortByVal);
218224
boxVals = cdi[valLetter] = pts.map(extractVal);
@@ -407,10 +413,24 @@ function extractVal(o) { return o.v; }
407413
// last point below 1.5 * IQR
408414
function computeLowerFence(cdi, boxVals, N) {
409415
if(N === 0) return cdi.q1;
416+
417+
var lowerFence;
418+
419+
if (cdi.usesLogNormal) {
420+
// For log-normal distribution, compute fence in log space to prevent negative values
421+
var logQ1 = Math.log(Math.max(cdi.q1, Number.MIN_VALUE));
422+
var logQ3 = Math.log(Math.max(cdi.q3, Number.MIN_VALUE));
423+
var logIQR = logQ3 - logQ1;
424+
lowerFence = Math.exp(logQ1 - 1.5 * logIQR);
425+
} else {
426+
// Standard 1.5 * IQR calculation (2.5*Q1 - 1.5*Q3 is algebraically equivalent)
427+
lowerFence = 2.5 * cdi.q1 - 1.5 * cdi.q3;
428+
}
429+
410430
return Math.min(
411431
cdi.q1,
412432
boxVals[Math.min(
413-
Lib.findBin(2.5 * cdi.q1 - 1.5 * cdi.q3, boxVals, true) + 1,
433+
Lib.findBin(lowerFence, boxVals, true) + 1,
414434
N - 1
415435
)]
416436
);
@@ -419,10 +439,24 @@ function computeLowerFence(cdi, boxVals, N) {
419439
// last point above 1.5 * IQR
420440
function computeUpperFence(cdi, boxVals, N) {
421441
if(N === 0) return cdi.q3;
442+
443+
var upperFence;
444+
445+
if (cdi.usesLogNormal) {
446+
// For log-normal distribution, compute fence in log space
447+
var logQ1 = Math.log(Math.max(cdi.q1, Number.MIN_VALUE));
448+
var logQ3 = Math.log(Math.max(cdi.q3, Number.MIN_VALUE));
449+
var logIQR = logQ3 - logQ1;
450+
upperFence = Math.exp(logQ3 + 1.5 * logIQR);
451+
} else {
452+
// Standard 1.5 * IQR calculation (2.5*Q3 - 1.5*Q1 is algebraically equivalent)
453+
upperFence = 2.5 * cdi.q3 - 1.5 * cdi.q1;
454+
}
455+
422456
return Math.max(
423457
cdi.q3,
424458
boxVals[Math.max(
425-
Lib.findBin(2.5 * cdi.q3 - 1.5 * cdi.q1, boxVals),
459+
Lib.findBin(upperFence, boxVals),
426460
0
427461
)]
428462
);
+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
{
2+
"data": [
3+
{
4+
"type": "box",
5+
"name": "Normal Dist (Linear)",
6+
"x": [1],
7+
"y": [1, 2, 3, 4, 5, 10, 20, 100],
8+
"distribution": "normal",
9+
"boxmean": true
10+
},
11+
{
12+
"type": "box",
13+
"name": "Log-Normal Dist (Linear)",
14+
"x": [2],
15+
"y": [1, 2, 3, 4, 5, 10, 20, 100],
16+
"distribution": "log-normal",
17+
"boxmean": true
18+
},
19+
{
20+
"type": "box",
21+
"name": "Auto Dist (Linear)",
22+
"x": [3],
23+
"y": [1, 2, 3, 4, 5, 10, 20, 100],
24+
"distribution": "auto",
25+
"boxmean": true
26+
}
27+
],
28+
"layout": {
29+
"title": {
30+
"text": "Box Plot with Different Distribution Types (Linear Scale)"
31+
},
32+
"xaxis": {
33+
"title": "Distribution Type"
34+
},
35+
"yaxis": {
36+
"title": "Values"
37+
}
38+
}
39+
}
+40
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
{
2+
"data": [
3+
{
4+
"type": "box",
5+
"name": "Normal Dist (Log)",
6+
"x": [1],
7+
"y": [1, 2, 3, 4, 5, 10, 20, 100],
8+
"distribution": "normal",
9+
"boxmean": true
10+
},
11+
{
12+
"type": "box",
13+
"name": "Log-Normal Dist (Log)",
14+
"x": [2],
15+
"y": [1, 2, 3, 4, 5, 10, 20, 100],
16+
"distribution": "log-normal",
17+
"boxmean": true
18+
},
19+
{
20+
"type": "box",
21+
"name": "Auto Dist (Log)",
22+
"x": [3],
23+
"y": [1, 2, 3, 4, 5, 10, 20, 100],
24+
"distribution": "auto",
25+
"boxmean": true
26+
}
27+
],
28+
"layout": {
29+
"title": {
30+
"text": "Box Plot with Different Distribution Types (Log Scale)"
31+
},
32+
"xaxis": {
33+
"title": "Distribution Type"
34+
},
35+
"yaxis": {
36+
"type": "log",
37+
"title": "Values (log scale)"
38+
}
39+
}
40+
}

test/jasmine/tests/box_test.js

+178
Original file line numberDiff line numberDiff line change
@@ -1228,6 +1228,184 @@ describe('Test box calc', function() {
12281228
Plots.doCalcdata(gd);
12291229
return gd.calcdata[0];
12301230
}
1231+
1232+
it('should compute fence values differently depending on *distribution*', function() {
1233+
// Create a dataset that would have a negative lower fence with normal distribution
1234+
var y = [10, 20, 30, 40, 1000];
1235+
1236+
// Test with normal distribution
1237+
var cd = _calc({
1238+
y: y,
1239+
distribution: 'normal'
1240+
});
1241+
// The normal distribution fence could potentially be negative
1242+
1243+
// Test with log-normal distribution
1244+
var cd2 = _calc({
1245+
y: y,
1246+
distribution: 'log-normal'
1247+
});
1248+
// The log-normal lower fence should be higher (not negative)
1249+
expect(cd2[0].lf).toBeGreaterThan(0, 'log-normal distribution lower fence is positive');
1250+
1251+
// Skip test with negative values as the implementation gracefully handles them via Math.max
1252+
1253+
// Test auto distribution on a log axis
1254+
var cd4 = _calc({
1255+
y: y,
1256+
distribution: 'auto'
1257+
}, {
1258+
yaxis: {type: 'log'}
1259+
});
1260+
// Should use log-normal distribution
1261+
expect(cd4[0].lf).toBeGreaterThan(0, 'auto distribution on log axis');
1262+
expect(cd4[0].lf).toBeCloseTo(cd2[0].lf, 6, 'auto distribution equals log-normal on log axis');
1263+
});
1264+
1265+
it('should prevent negative whiskers with log-normal distribution', function() {
1266+
// This dataset would produce negative lower fence with normal distribution calculation
1267+
// (but the implementation will clamp to the minimum value)
1268+
var dataset = [2, 3, 5, 10, 200];
1269+
1270+
// Calculate with normal distribution
1271+
var cdNormal = _calc({
1272+
y: dataset,
1273+
distribution: 'normal'
1274+
});
1275+
1276+
// Calculate with log-normal distribution
1277+
var cdLogNormal = _calc({
1278+
y: dataset,
1279+
distribution: 'log-normal'
1280+
});
1281+
1282+
// Verify log-normal lower fence is positive
1283+
expect(cdLogNormal[0].lf).toBeGreaterThan(0, 'log-normal lower fence is positive');
1284+
});
1285+
1286+
it('should set usesLogNormal flag correctly for log-normal distribution', function() {
1287+
// Use a typical log-normally distributed dataset
1288+
var dataset = [1, 2, 5, 10, 20, 50, 100];
1289+
1290+
var cd = _calc({
1291+
y: dataset,
1292+
distribution: 'log-normal'
1293+
});
1294+
1295+
// Verify the usesLogNormal flag is set
1296+
expect(cd[0].usesLogNormal).toBe(true, 'usesLogNormal flag is set for log-normal distribution');
1297+
1298+
// Check that the fence values are reasonable
1299+
expect(cd[0].lf).toBeGreaterThan(0, 'log-normal lower fence is positive');
1300+
expect(cd[0].lf).toBeLessThan(cd[0].q1, 'lower fence is less than q1');
1301+
expect(cd[0].uf).toBeGreaterThan(cd[0].q3, 'upper fence is greater than q3');
1302+
});
1303+
1304+
it('should use correct distribution mode for auto setting', function() {
1305+
var dataset = [1, 2, 5, 10, 20, 50, 100];
1306+
1307+
// Test on linear axis
1308+
var cdLinear = _calc({
1309+
y: dataset,
1310+
distribution: 'auto'
1311+
}, {
1312+
yaxis: {type: 'linear'}
1313+
});
1314+
1315+
// Calculate with explicitly set normal distribution
1316+
var cdNormal = _calc({
1317+
y: dataset,
1318+
distribution: 'normal'
1319+
});
1320+
1321+
// Verify auto on linear axis uses normal distribution
1322+
expect(cdLinear[0].lf).toBeCloseTo(cdNormal[0].lf, 6, 'auto distribution equals normal on linear axis');
1323+
expect(cdLinear[0].uf).toBeCloseTo(cdNormal[0].uf, 6, 'auto distribution equals normal on linear axis');
1324+
1325+
// Test on log axis
1326+
var cdLog = _calc({
1327+
y: dataset,
1328+
distribution: 'auto'
1329+
}, {
1330+
yaxis: {type: 'log'}
1331+
});
1332+
1333+
// Calculate with explicitly set log-normal distribution
1334+
var cdLogNormal = _calc({
1335+
y: dataset,
1336+
distribution: 'log-normal'
1337+
});
1338+
1339+
// Verify auto on log axis uses log-normal distribution
1340+
expect(cdLog[0].lf).toBeCloseTo(cdLogNormal[0].lf, 6, 'auto distribution equals log-normal on log axis');
1341+
expect(cdLog[0].uf).toBeCloseTo(cdLogNormal[0].uf, 6, 'auto distribution equals log-normal on log axis');
1342+
});
1343+
1344+
it('should correctly handle explicit fence values', function() {
1345+
var dataset = [1, 2, 5, 10, 20, 50, 100];
1346+
1347+
// With normal distribution and no explicit fences (baseline)
1348+
var cdNormalBaseline = _calc({
1349+
y: dataset,
1350+
distribution: 'normal'
1351+
});
1352+
1353+
// With log-normal distribution and no explicit fences (baseline)
1354+
var cdLogNormalBaseline = _calc({
1355+
y: dataset,
1356+
distribution: 'log-normal'
1357+
});
1358+
1359+
// Fence values must be valid (>= q1 and <= q3)
1360+
var validLowerFence = cdNormalBaseline[0].q1;
1361+
var validUpperFence = cdNormalBaseline[0].q3;
1362+
1363+
// With normal distribution and valid explicit fences
1364+
var cdNormal = _calc({
1365+
y: dataset,
1366+
distribution: 'normal',
1367+
lowerfence: [validLowerFence],
1368+
upperfence: [validUpperFence]
1369+
});
1370+
1371+
// With log-normal distribution and valid explicit fences
1372+
var cdLogNormal = _calc({
1373+
y: dataset,
1374+
distribution: 'log-normal',
1375+
lowerfence: [validLowerFence],
1376+
upperfence: [validUpperFence]
1377+
});
1378+
1379+
// Verify explicit fence values are used when valid
1380+
expect(cdNormal[0].lf).toEqual(validLowerFence, 'normal distribution uses valid explicit lower fence');
1381+
expect(cdNormal[0].uf).toEqual(validUpperFence, 'normal distribution uses valid explicit upper fence');
1382+
expect(cdLogNormal[0].lf).toEqual(validLowerFence, 'log-normal distribution uses valid explicit lower fence');
1383+
expect(cdLogNormal[0].uf).toEqual(validUpperFence, 'log-normal distribution uses valid explicit upper fence');
1384+
});
1385+
1386+
it('should handle extreme data distributions correctly', function() {
1387+
// Very skewed dataset that would have strongly negative whiskers with normal distribution
1388+
var extremeDataset = [1, 2, 3, 4, 5, 1000, 2000, 5000];
1389+
1390+
// With normal distribution
1391+
var cdNormal = _calc({
1392+
y: extremeDataset,
1393+
distribution: 'normal'
1394+
});
1395+
1396+
// With log-normal distribution
1397+
var cdLogNormal = _calc({
1398+
y: extremeDataset,
1399+
distribution: 'log-normal'
1400+
});
1401+
1402+
// Verify log-normal gives reasonable positive whiskers
1403+
expect(cdLogNormal[0].lf).toBeGreaterThan(0, 'log-normal gives positive lower fence for extreme data');
1404+
1405+
// Verify usesLogNormal flag is set correctly
1406+
expect(cdNormal[0].usesLogNormal).toBe(false, 'normal distribution sets flag to false');
1407+
expect(cdLogNormal[0].usesLogNormal).toBe(true, 'log-normal distribution sets flag to true');
1408+
});
12311409

12321410
it('should compute q1/q3 depending on *quartilemethod*', function() {
12331411
// samples from https://en.wikipedia.org/wiki/Quartile

0 commit comments

Comments
 (0)