-
Notifications
You must be signed in to change notification settings - Fork 96
/
Mondriaan.sas
505 lines (499 loc) · 20.4 KB
/
Mondriaan.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
%MACRO MONDRIAAN(
DATA=_LAST_,
XVAR=,YVAR=,SHARE=,RESPONSE=,
XREST=1,YREST=1,
XRESTLBL=,YRESTLBL=,
XORDER=DATA,
YORDER=DATA,
LBLFONT=,LBLSIZE=,LBLFMT=,LOFLOW=,
MIDPOINT=,LEGEND=,DISCRETE=,MIDPTFMT=,
LGNDFONT=,LGNDSIZE=,
RESPLBL=,RESPDATA=,
DRILDATA=,HTMLVAR=
);
%*--------------------------------------------------------------;
%* Macro XYCHART produces an area chart which depicts ;
%* the percentage share of the X variable horizontally and ;
%* the percentage share of the Y variable as subdivision of ;
%* X vertically. Both add up to 100% ;
%* Optionally it can also color/fill the thus created cells to ;
%* indicate another quantity, e.g. performance compared to ;
%* the share. To make use of this option it is best to summarize;
%* the input dataset to contain only one observation per cell ;
%* otherwise the value of the first observation for a cell will ;
%* be used for the coloring. Use PATTERN statements before ;
%* calling the XYCHART macro to specify the filling color and/or;
%* patterns. ;
%* ;
%* Parameters: DATA= the input dataset ;
%* XVAR= classification variable in X dimension ;
%* YVAR= classification variable in Y dimension ;
%* SHARE= the share value of the X-Y combination ;
%* RESPONSE= Variable in the input dataset that ;
%* will function as response variable to ;
%* color (or gray) the squares in the ;
%* chart ;
%* XREST= the cut-off percentage to include the ;
%* X classification in a rest-bucket ;
%* YREST= the cut-off percentage to include the ;
%* Y classification in a rest-bucket ;
%* XRESTLBL= Label for rest-bucket in X dimension ;
%* YRESTLBL= Label for rest-bucket in Y dimension ;
%* XORDER= Sort order for the columns ;
%* Options: ;
%* DATA (default): alphabetic ;
%* DESCENDING: descending value of the ;
%* XVAR variable share ;
%* ASCENDING: ascending value of the ;
%* XVAR variable share ;
%* YORDER= Sort order for the YVAR stacking in ;
%* the columns. Options as for XORDER ;
%* LBLFONT= Specifies the font for the labeling of ;
%* the cells of the chart. Default: SWISS ;
%* LBLSIZE= Charactersize for cel and column labels;
%* default: 0.5 ;
%* LBLFMT= Format for share data in cells ;
%* Default value: 5.1 ;
%* LOFLOW= Print labels also if they do not fit in;
%* the cell area. Value Y or N (default) ;
%* MIDPOINT= Defines the midpoints to be used for ;
%* the response variable. If not specified;
%* SAS will determine the midpoints. ;
%* LEGEND= Used to suppress the standard legend ;
%* Values: Y (yes, print legend) ;
%* Values: N (no legend, default) ;
%* DISCRETE= Used in combination with a numeric ;
%* response variable. It specifies that ;
%* each value of the response variable ;
%* is treated as a different level. ;
%* In combination with a format (MIDPTFMT ;
%* option) it will create a level for ;
%* each formatted value of the response ;
%* variable. ;
%* MIDPTFMT= Specifies format to be applied to the ;
%* response variable. Combine with ;
%* DISCRETE option to create a level for ;
%* each formatted value of the response ;
%* variable. ;
%* LGNDFONT= Specifies the font to be used in a ;
%* legend. Default SWISS font ;
%* LGNDSIZE= Specifies the fontsize to be used in a ;
%* legend. Default 0.6 ;
%* RESPDATA= Specifies a data set with additional ;
%* levels for the response variables to ;
%* make the patterns consistent also when ;
%* certain levels are missing. ;
%* DRILDATA= Names a data set that contains tool ;
%* tips and/or drill down data. The data ;
%* set should contain an observation for ;
%* each XVAR and YVAR combination. The ;
%* required variables are those specified ;
%* in the XVAR, YVAR and HTMLVAR options ;
%* HTMLVAR= Nominates the variable used for the ;
%* drill down information. ;
%* Default: HTMLVAR ;
%* ;
%* Copyright: Erik Tilanus ;
%*--------------------------------------------------------------;
%* Initialization: verify that all parameters are defined and ;
%* put a note to make defaults visible. ;
%*--------------------------------------------------------------;
%local __ERROR;
%let __ERROR = 0;
%if &xvar= or &yvar= or &share= %then %do;
%let __error = 1;
%put ERROR: XVAR, YVAR or SHARE parameter missing;
%end;
%if &response = %then %do;
%put NOTE: RESPONSE= not specified, squares are not colored;
%end;
%if &xrestlbl= %then %do;
%let xrestlbl = '***';
%put NOTE: XRESTLBL= not specified, '***' assumed;
%end;
%if &yrestlbl= %then %do;
%let yrestlbl = '***';
%put NOTE: YRESTLBL= not specified, '***' assumed;
%end;
%let xorder = %upcase(&xorder);
%if &xorder = DATA or &xorder = ASCENDING or &xorder = DESCENDING
%then;
%else %do;
%put ERROR: XORDER should be DATA, ASCENDING or DESCENDING;
%let __ERROR = 1;
%end;
%let yorder = %upcase(&yorder);
%if &yorder = DATA or &yorder = ASCENDING or &yorder = DESCENDING
%then;
%else %do;
%put ERROR: YORDER should be DATA, ASCENDING or DESCENDING;
%let __ERROR = 1;
%end;
%if &lblfont = %then %do;
%let lblfont = SWISS;
%put NOTE: LBLFONT= not specified, SWISS assumed;
%end;
%if &lblsize = %then %do;
%let lblsize = .5;
%put NOTE: LBLSIZE= not specified, 0.5 assumed;
%end;
%if &lblfmt = %then %do;
%let lblfmt = 5.1;
%put NOTE: LBLFMT= not specified, 5.1 assumed;
%end;
%if &loflow = %then %do;
%let loflow = N;
%put NOTE: LOFLOW= not specified, N (NO) assumed;
%end;
%let loflow = %upcase(%substr(&loflow,1,1));
%if &loflow = N or &loflow = Y %then;
%else %do;
%PUT ERROR: LOFLOW= should be Y or N;
%let __ERROR = 1;
%end;
%if &midpoint = %then %do;
%put NOTE: MIDPOINT= not specified, midpoints are calculated;
%end;
%if &midptfmt = %then %do;
%put NOTE: MIDPTFMT= not specified, no format applied;
%end;
%if &discrete = %then %do;
%let discrete = N;
%put NOTE: DISCRETE= not specified, N assumed;
%end;
%let discrete = %upcase(%substr(&discrete,1,1));
%if &discrete = N or &discrete = Y %then;
%else %do;
%PUT ERROR: DISCRETE= should be Y or N;
%let __ERROR = 1;
%end;
%if &lgndfont = %then %do;
%let lgndfont = SWISS;
%PUT NOTE: LGNDFONT= not specified, SWISS assumed;
%end;
%if &lgndsize = %then %do;
%let lgndsize = 0.6;
%put NOTE: LGNDSIZE= not specified, 0.6 assumed;
%end;
%if &resplbl = %then
%put NOTE: RESPLBL= not specified, default label applied;
%if &respdata = %then
%put NOTE: RESPDATA= not specified, may lead to inconsistent colors;
%if &drildata = %then
%put NOTE: DRILDATA= not specified, no drill down information created;
%if &htmlvar = %then %do;
%if &drildata = %then;
%else %do;
%let htmlvar = HTMLVAR;
%put NOTE: HTMLVAR= not specified, HTMLVAR assumed;
%end;
%end;
%else %do;
%if &drildata = %then
%put NOTE: No DRILDATA= specified, HTMLVAR= ignored;
%end;
%*--------------------------------------------------------------;
%* The parameters are checked. If the error flag is not set ;
%* we start the real work. First step: calculate share for each ;
%* value of the X classification variable, using PROC FREQ ;
%*--------------------------------------------------------------;
%if &__ERROR eq 1 %then %goto exit;
PROC FREQ DATA=&DATA;
TABLE &XVAR/OUT=_&SYSDATE NOPRINT;
WEIGHT &SHARE;
RUN;
%*--------------------------------------------------------------;
%* combine smaller units into rest group ;
%* label rest group temporarily with a high value label ('FF'X) ;
%*--------------------------------------------------------------;
DATA _&SYSDATE;
SET;
IF PERCENT LE &XREST THEN &XVAR='FF'X;
RUN;
%*--------------------------------------------------------------;
%* collect the new list of x classifications and merge with ;
%* the original to change the X var to the rest group ;
%*--------------------------------------------------------------;
PROC SORT NODUPKEY OUT=_&SYSDATE(KEEP=&XVAR);
BY &XVAR;
RUN;
PROC SORT DATA=&DATA OUT=D&SYSDATE;
BY &XVAR;
RUN;
DATA _&SYSDATE;
MERGE D&SYSDATE(IN=A) _&SYSDATE(IN=C);
BY &XVAR;
IF A AND NOT C THEN &XVAR='FF'X;
RUN;
%*--------------------------------------------------------------;
%* Redo PROC FREQ for the new x classification list ;
%*--------------------------------------------------------------;
PROC FREQ;
TABLE &XVAR/OUT=X&SYSDATE NOPRINT;
WEIGHT &SHARE;
RUN;
%*--------------------------------------------------------------;
%* Now determine share of YVAR for each column ;
%* Again first time for all values of YVAR ;
%* This is done in PROC SUMMARY rather than PROC FREQ to be ;
%* able to copy the response variable value. ;
%*--------------------------------------------------------------;
PROC SUMMARY DATA=_&SYSDATE;
CLASS &XVAR &YVAR;
VAR &SHARE
%IF &RESPONSE NE %THEN &RESPONSE;
;
FREQ &SHARE;
OUTPUT OUT=_&SYSDATE N(&SHARE)=
%IF &RESPONSE NE %THEN MEAN(&RESPONSE)= ;
;
RUN;
%*--------------------------------------------------------------;
%* Sort this result and kick out _TYPE_ =0 or 1 ;
%* _TYPE_ = 2 contains the denominator upon which ;
%* to determine the the percentages for _TYPE_=3 observations ;
%*--------------------------------------------------------------;
PROC SORT;
BY &XVAR _TYPE_;
WHERE _TYPE_ GE 2;
RUN;
%*--------------------------------------------------------------;
%* Kick out the Y classification with contribution less then ;
%* the cut-off value (YREST) ;
%* Label the rest group again with 'FF'X temporarily ;
%*--------------------------------------------------------------;
DATA _&SYSDATE;
RETAIN DENOM;
SET;
BY &XVAR;
IF FIRST.&XVAR THEN DENOM=&SHARE;
IF &SHARE*100/DENOM LE &YREST THEN &YVAR='FF'X;
IF _TYPE_=3 THEN OUTPUT;
RUN;
%*--------------------------------------------------------------;
%* Rerun PROC SUMMARY and sort it again, select _TYPE_ as above ;
%*--------------------------------------------------------------;
PROC SUMMARY DATA=_&SYSDATE;
CLASS &XVAR &YVAR;
VAR &SHARE
%IF &RESPONSE NE %THEN &RESPONSE;
;
FREQ &SHARE;
OUTPUT OUT=Q&SYSDATE N(&SHARE)=
%IF &RESPONSE NE %THEN MEAN(&RESPONSE)= ;
;
RUN;
PROC SORT;
BY &XVAR _TYPE_;
WHERE _TYPE_ GE 2;
RUN;
%*--------------------------------------------------------------;
%* Calculate the percentages in Y direction ;
%*--------------------------------------------------------------;
DATA Y&SYSDATE;
RETAIN DENOM;
SET;
BY &XVAR;
IF FIRST.&XVAR THEN DENOM=&SHARE;
PERCENT = &SHARE*100/DENOM;
IF _TYPE_=3 THEN OUTPUT;
RUN;
%*--------------------------------------------------------------;
%* Determine sort order for X axis and Y axis ;
%*--------------------------------------------------------------;
PROC SORT DATA=Y&SYSDATE OUT=Y&SYSDATE(INDEX=(&XVAR));
%IF %UPCASE(&YORDER) = DESCENDING %THEN %DO;
BY DESCENDING PERCENT;
%END;
%ELSE %DO;
%IF &YORDER = ASCENDING %THEN %DO;
BY PERCENT;
%END;
%ELSE %DO;
BY &YVAR;
%END;
%END;
PROC SORT DATA=X&SYSDATE;
%IF &XORDER = DESCENDING %THEN %DO;
BY DESCENDING PERCENT;
%END;
%ELSE %DO;
%IF &XORDER = ASCENDING %THEN %DO;
BY PERCENT;
%END;
%ELSE %DO;
BY &XVAR;
%END;
%END;
%*--------------------------------------------------------------;
%* Verify the drill down data set and create an index on it ;
%*--------------------------------------------------------------;
%IF &DRILDATA NE %THEN %DO;
PROC SORT DATA=&DRILDATA OUT=H&SYSDATE(INDEX=(CELL=(&XVAR &YVAR))) NODUPKEY;
BY &XVAR &YVAR;
RUN;
%END;
%*--------------------------------------------------------------;
%* Prepare the map, response and annotate dataset ;
%*--------------------------------------------------------------;
DATA M&SYSDATE(KEEP= X Y &XVAR &YVAR)
C&SYSDATE(KEEP= &XVAR &YVAR RESPONSE %IF &DRILDATA NE %THEN &HTMLVAR; )
A&SYSDATE(KEEP= X Y FUNCTION XSYS YSYS STYLE POSITION WHEN
TEXT SIZE);
SET X&SYSDATE(RENAME=(PERCENT=X_SHR));
BY &XVAR NOTSORTED;
RETAIN LLX LLY;
RETAIN RESPONSE 1;
RETAIN RESTTAG 'FF'X;
RETAIN FUNCTION 'LABEL ' XSYS '2' YSYS '2'
STYLE 'SWISS ' WHEN 'A';
RETAIN SIZE 0.25;
LENGTH TEXT $16;
IF _N_=1 THEN DO;
LLX = 0;
LLY = 0;
END;
DO UNTIL (_IORC_ NE 0);
SET Y&SYSDATE(RENAME=(PERCENT=Y_SHR )) KEY=&XVAR;
IF _IORC_ THEN LEAVE;
%*--------------------------------------------------------------;
%* Following routines write out the map datasets, square by ;
%* square, starting in the lower left corner of each square. ;
%* ;
%* With all coordinate manipulations we have to check whether ;
%* it is a rest group or a regular group. ;
%* For rest groups the coordinaed are calculated from the end ;
%* i.e. 150 on the X axis and 100 on the Y axis ;
%*--------------------------------------------------------------;
%*--------------------------------------------------------------;
%* Lower left corner ;
%*--------------------------------------------------------------;
IF &XVAR =: RESTTAG THEN X = 150 - X_SHR*1.5; ELSE X = LLX;
IF &YVAR =: RESTTAG THEN Y = 100 - Y_SHR; ELSE Y = LLY;
OUTPUT M&SYSDATE;
%*--------------------------------------------------------------;
%* Upper left corner ;
%*--------------------------------------------------------------;
IF &YVAR =: RESTTAG THEN Y = 100; ELSE Y = LLY + Y_SHR;
OUTPUT M&SYSDATE;
%*--------------------------------------------------------------;
%* Upper right corner ;
%*--------------------------------------------------------------;
IF &XVAR =: RESTTAG THEN X=150; ELSE X = LLX + X_SHR*1.5;
OUTPUT M&SYSDATE;
%*--------------------------------------------------------------;
%* Lower right corner ;
%*--------------------------------------------------------------;
IF &YVAR =: RESTTAG THEN Y = 100 - Y_SHR; ELSE Y = LLY;
OUTPUT M&SYSDATE;
%*--------------------------------------------------------------;
%* Write observation to response dataset ;
%* If there is a drill down data set, then get the information ;
%*--------------------------------------------------------------;
%if &response ne %then RESPONSE = &RESPONSE %str(;);
%else RESPONSE = 0 %str(;);
%IF &DRILDATA NE %THEN %DO;
SET H&SYSDATE(KEEP=&XVAR &YVAR &HTMLVAR) KEY=CELL/UNIQUE ;
IF _IORC_ THEN DO;
_IORC_ = 0;
&HTMLVAR = ' ';
END;
%END;
OUTPUT C&SYSDATE;
%*--------------------------------------------------------------;
%* Write observations to annotate dataset ;
%* In vertical direction the text anchor is above the midpoint ;
%* of the cell. This provides the best positioning in case the ;
%* cell space is cramped ;
%*--------------------------------------------------------------;
IF &XVAR =: RESTTAG THEN X = 150 - .5*X_SHR*1.5;
ELSE X = LLX + .5*X_SHR*1.5;
IF &YVAR =: RESTTAG THEN Y = 100 - .35*Y_SHR;
ELSE Y = LLY + .65*Y_SHR;
STYLE = "&LBLFONT";
SIZE = &LBLSIZE;
POSITION = 'B';
IF &YVAR =: RESTTAG THEN TEXT = &YRESTLBL;
ELSE TEXT = &YVAR;
%*--------------------------------------------------------------;
%* Look whether the information fits in the square before ;
%* outputing the information to the annotate set. ;
%* This is simply done by comparing height and width of the ;
%* text with the square size. No sophisticated algorithms to ;
%* take into account the individual character width. ;
%* This can be blocked with the LOFLOW option ;
%*--------------------------------------------------------------;
%IF &LOFLOW EQ N %THEN %DO;
IF Y - LLY GT SIZE*3.5 AND (X-LLX)*2 GT LENGTH(TEXT)*SIZE THEN
%END;
OUTPUT A&SYSDATE;
POSITION = 'E';
TEXT = PUT(&SHARE,&LBLFMT);
%IF &LOFLOW EQ N %THEN %DO;
IF Y - LLY GT SIZE*3.5 AND (X-LLX)*2 GT LENGTH(TEXT)*SIZE THEN
%END;
OUTPUT A&SYSDATE;
%*--------------------------------------------------------------;
%* Move lower left corner to next spot unless we just processed ;
%* the rest group of a column ;
%*--------------------------------------------------------------;
IF &YVAR =: RESTTAG THEN; ELSE LLY = LLY + Y_SHR;
END;
%*--------------------------------------------------------------;
%* Reset possible error flag ;
%*--------------------------------------------------------------;
_error_ = 0;
%*--------------------------------------------------------------;
%* If whole column created then note XVAR value at x-axis ;
%* and move LLX to the left coordinates of the next column ;
%* As the RESt column is always at the right end of the map ;
%* calculate then the coordinate from the end and dont update ;
%* the lower left X coordinate ;
%*--------------------------------------------------------------;
IF LAST.&XVAR THEN DO;
IF &XVAR =: RESTTAG THEN X = 150 - .5*X_SHR*1.5;
ELSE X = LLX + .5*X_SHR*1.5;
Y = 0;
SIZE=1.2*&LBLSIZE;
POSITION = 'E';
IF &XVAR =: RESTTAG THEN TEXT = &XRESTLBL;
ELSE TEXT = &XVAR;
%IF &LOFLOW EQ N %THEN %DO;
IF (X-LLX)*2 GT LENGTH(TEXT)*SIZE THEN
%END;
OUTPUT A&SYSDATE;
IF &XVAR =: RESTTAG THEN; ELSE LLX = LLX + X_SHR*1.5;
LLY = 0;
END;
RUN;
%IF &RESPDATA ne %THEN %DO;
DATA C&SYSDATE;
SET C&SYSDATE &RESPDATA;
RUN;
%END;
PROC GMAP MAP=M&SYSDATE DATA=C&SYSDATE ANNO=A&SYSDATE;
ID &XVAR &YVAR;
%*-------------------------------------------------------------;
%* If no further options required then stop here and let the ;
%* user define the rest of the GMAP specifications ;
*--------------------------------------------------------------;
CHORO RESPONSE/CTEXT=BLACK COUTLINE=BLACK
%IF &MIDPOINT NE %THEN MIDPOINTS=&MIDPOINT;
%IF %UPCASE(&LEGEND) NE Y %THEN NOLEGEND;
%ELSE LEGEND=LEGEND1;
%IF %UPCASE(&DISCRETE) EQ Y %THEN DISCRETE;
%IF &DRILDATA NE %THEN HTML = &HTMLVAR;
;
%IF %UPCASE(&LEGEND) EQ Y %THEN
LEGEND LABEL=(F=&LGNDFONT H=&LGNDSIZE &RESPLBL)
VALUE=(F=&LGNDFONT H=&LGNDSIZE) %STR(;) ;
%IF &MIDPTFMT NE %THEN
FORMAT RESPONSE &MIDPTFMT.. %STR(;) ;
%IF &RESPLBL NE %THEN LABEL RESPONSE = &RESPLBL %STR(;) ;
RUN;
QUIT;
%*--------------------------------------------------------------;
%* This is where we go, if one of the parameters is in error ;
%*--------------------------------------------------------------;
%exit:
%MEND;